ix_txrx.c revision 1.54 1 /* $NetBSD: ix_txrx.c,v 1.54 2019/07/04 08:56:35 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int ixgbe_tx_ctx_setup(struct tx_ring *,
109 struct mbuf *, u32 *, u32 *);
110 static int ixgbe_tso_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114 struct mbuf *, u32);
115 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
116 struct ixgbe_dma_alloc *, int);
117 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118
119 static void ixgbe_setup_hw_rsc(struct rx_ring *);
120
121 /************************************************************************
122 * ixgbe_legacy_start_locked - Transmit entry point
123 *
124 * Called by the stack to initiate a transmit.
125 * The driver will remain in this routine as long as there are
126 * packets to transmit and transmit resources are available.
127 * In case resources are not available, the stack is notified
128 * and the packet is requeued.
129 ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 int rc;
134 struct mbuf *m_head;
135 struct adapter *adapter = txr->adapter;
136
137 IXGBE_TX_LOCK_ASSERT(txr);
138
139 if (adapter->link_active != LINK_STATE_UP) {
140 /*
141 * discard all packets buffered in IFQ to avoid
142 * sending old packets at next link up timing.
143 */
144 ixgbe_drain(ifp, txr);
145 return (ENETDOWN);
146 }
147 if ((ifp->if_flags & IFF_RUNNING) == 0)
148 return (ENETDOWN);
149 if (txr->txr_no_space)
150 return (ENETDOWN);
151
152 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
153 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
154 break;
155
156 IFQ_POLL(&ifp->if_snd, m_head);
157 if (m_head == NULL)
158 break;
159
160 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
161 break;
162 }
163 IFQ_DEQUEUE(&ifp->if_snd, m_head);
164 if (rc != 0) {
165 m_freem(m_head);
166 continue;
167 }
168
169 /* Send a copy of the frame to the BPF listener */
170 bpf_mtap(ifp, m_head, BPF_D_OUT);
171 }
172
173 return IXGBE_SUCCESS;
174 } /* ixgbe_legacy_start_locked */
175
176 /************************************************************************
177 * ixgbe_legacy_start
178 *
179 * Called by the stack, this always uses the first tx ring,
180 * and should not be used with multiqueue tx enabled.
181 ************************************************************************/
182 void
183 ixgbe_legacy_start(struct ifnet *ifp)
184 {
185 struct adapter *adapter = ifp->if_softc;
186 struct tx_ring *txr = adapter->tx_rings;
187
188 if (ifp->if_flags & IFF_RUNNING) {
189 IXGBE_TX_LOCK(txr);
190 ixgbe_legacy_start_locked(ifp, txr);
191 IXGBE_TX_UNLOCK(txr);
192 }
193 } /* ixgbe_legacy_start */
194
195 /************************************************************************
196 * ixgbe_mq_start - Multiqueue Transmit Entry Point
197 *
198 * (if_transmit function)
199 ************************************************************************/
200 int
201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
202 {
203 struct adapter *adapter = ifp->if_softc;
204 struct tx_ring *txr;
205 int i;
206 #ifdef RSS
207 uint32_t bucket_id;
208 #endif
209
210 /*
211 * When doing RSS, map it to the same outbound queue
212 * as the incoming flow would be mapped to.
213 *
214 * If everything is setup correctly, it should be the
215 * same bucket that the current CPU we're on is.
216 */
217 #ifdef RSS
218 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
219 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
220 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
221 &bucket_id) == 0)) {
222 i = bucket_id % adapter->num_queues;
223 #ifdef IXGBE_DEBUG
224 if (bucket_id > adapter->num_queues)
225 if_printf(ifp,
226 "bucket_id (%d) > num_queues (%d)\n",
227 bucket_id, adapter->num_queues);
228 #endif
229 } else
230 i = m->m_pkthdr.flowid % adapter->num_queues;
231 } else
232 #endif /* 0 */
233 i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
234
235 /* Check for a hung queue and pick alternative */
236 if (((1ULL << i) & adapter->active_queues) == 0)
237 i = ffs64(adapter->active_queues);
238
239 txr = &adapter->tx_rings[i];
240
241 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
242 m_freem(m);
243 txr->pcq_drops.ev_count++;
244 return ENOBUFS;
245 }
246 if (IXGBE_TX_TRYLOCK(txr)) {
247 ixgbe_mq_start_locked(ifp, txr);
248 IXGBE_TX_UNLOCK(txr);
249 } else {
250 if (adapter->txrx_use_workqueue) {
251 u_int *enqueued;
252
253 /*
254 * This function itself is not called in interrupt
255 * context, however it can be called in fast softint
256 * context right after receiving forwarding packets.
257 * So, it is required to protect workqueue from twice
258 * enqueuing when the machine uses both spontaneous
259 * packets and forwarding packets.
260 */
261 enqueued = percpu_getref(adapter->txr_wq_enqueued);
262 if (*enqueued == 0) {
263 *enqueued = 1;
264 percpu_putref(adapter->txr_wq_enqueued);
265 workqueue_enqueue(adapter->txr_wq,
266 &txr->wq_cookie, curcpu());
267 } else
268 percpu_putref(adapter->txr_wq_enqueued);
269 } else
270 softint_schedule(txr->txr_si);
271 }
272
273 return (0);
274 } /* ixgbe_mq_start */
275
276 /************************************************************************
277 * ixgbe_mq_start_locked
278 ************************************************************************/
279 int
280 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
281 {
282 struct mbuf *next;
283 int enqueued = 0, err = 0;
284
285 if (txr->adapter->link_active != LINK_STATE_UP) {
286 /*
287 * discard all packets buffered in txr_interq to avoid
288 * sending old packets at next link up timing.
289 */
290 ixgbe_drain(ifp, txr);
291 return (ENETDOWN);
292 }
293 if ((ifp->if_flags & IFF_RUNNING) == 0)
294 return (ENETDOWN);
295 if (txr->txr_no_space)
296 return (ENETDOWN);
297
298 /* Process the queue */
299 while ((next = pcq_get(txr->txr_interq)) != NULL) {
300 if ((err = ixgbe_xmit(txr, next)) != 0) {
301 m_freem(next);
302 /* All errors are counted in ixgbe_xmit() */
303 break;
304 }
305 enqueued++;
306 #if __FreeBSD_version >= 1100036
307 /*
308 * Since we're looking at the tx ring, we can check
309 * to see if we're a VF by examing our tail register
310 * address.
311 */
312 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
313 (next->m_flags & M_MCAST))
314 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
315 #endif
316 /* Send a copy of the frame to the BPF listener */
317 bpf_mtap(ifp, next, BPF_D_OUT);
318 if ((ifp->if_flags & IFF_RUNNING) == 0)
319 break;
320 }
321
322 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
323 ixgbe_txeof(txr);
324
325 return (err);
326 } /* ixgbe_mq_start_locked */
327
328 /************************************************************************
329 * ixgbe_deferred_mq_start
330 *
331 * Called from a softint and workqueue (indirectly) to drain queued
332 * transmit packets.
333 ************************************************************************/
334 void
335 ixgbe_deferred_mq_start(void *arg)
336 {
337 struct tx_ring *txr = arg;
338 struct adapter *adapter = txr->adapter;
339 struct ifnet *ifp = adapter->ifp;
340
341 IXGBE_TX_LOCK(txr);
342 if (pcq_peek(txr->txr_interq) != NULL)
343 ixgbe_mq_start_locked(ifp, txr);
344 IXGBE_TX_UNLOCK(txr);
345 } /* ixgbe_deferred_mq_start */
346
347 /************************************************************************
348 * ixgbe_deferred_mq_start_work
349 *
350 * Called from a workqueue to drain queued transmit packets.
351 ************************************************************************/
352 void
353 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
354 {
355 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
356 struct adapter *adapter = txr->adapter;
357 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
358 *enqueued = 0;
359 percpu_putref(adapter->txr_wq_enqueued);
360
361 ixgbe_deferred_mq_start(txr);
362 } /* ixgbe_deferred_mq_start */
363
364 /************************************************************************
365 * ixgbe_drain_all
366 ************************************************************************/
367 void
368 ixgbe_drain_all(struct adapter *adapter)
369 {
370 struct ifnet *ifp = adapter->ifp;
371 struct ix_queue *que = adapter->queues;
372
373 for (int i = 0; i < adapter->num_queues; i++, que++) {
374 struct tx_ring *txr = que->txr;
375
376 IXGBE_TX_LOCK(txr);
377 ixgbe_drain(ifp, txr);
378 IXGBE_TX_UNLOCK(txr);
379 }
380 }
381
382 /************************************************************************
383 * ixgbe_xmit
384 *
385 * Maps the mbufs to tx descriptors, allowing the
386 * TX engine to transmit the packets.
387 *
388 * Return 0 on success, positive on failure
389 ************************************************************************/
390 static int
391 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
392 {
393 struct adapter *adapter = txr->adapter;
394 struct ixgbe_tx_buf *txbuf;
395 union ixgbe_adv_tx_desc *txd = NULL;
396 struct ifnet *ifp = adapter->ifp;
397 int i, j, error;
398 int first;
399 u32 olinfo_status = 0, cmd_type_len;
400 bool remap = TRUE;
401 bus_dmamap_t map;
402
403 /* Basic descriptor defines */
404 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
405 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
406
407 if (vlan_has_tag(m_head))
408 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
409
410 /*
411 * Important to capture the first descriptor
412 * used because it will contain the index of
413 * the one we tell the hardware to report back
414 */
415 first = txr->next_avail_desc;
416 txbuf = &txr->tx_buffers[first];
417 map = txbuf->map;
418
419 /*
420 * Map the packet for DMA.
421 */
422 retry:
423 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
424 BUS_DMA_NOWAIT);
425
426 if (__predict_false(error)) {
427 struct mbuf *m;
428
429 switch (error) {
430 case EAGAIN:
431 txr->q_eagain_tx_dma_setup++;
432 return EAGAIN;
433 case ENOMEM:
434 txr->q_enomem_tx_dma_setup++;
435 return EAGAIN;
436 case EFBIG:
437 /* Try it again? - one try */
438 if (remap == TRUE) {
439 remap = FALSE;
440 /*
441 * XXX: m_defrag will choke on
442 * non-MCLBYTES-sized clusters
443 */
444 txr->q_efbig_tx_dma_setup++;
445 m = m_defrag(m_head, M_NOWAIT);
446 if (m == NULL) {
447 txr->q_mbuf_defrag_failed++;
448 return ENOBUFS;
449 }
450 m_head = m;
451 goto retry;
452 } else {
453 txr->q_efbig2_tx_dma_setup++;
454 return error;
455 }
456 case EINVAL:
457 txr->q_einval_tx_dma_setup++;
458 return error;
459 default:
460 txr->q_other_tx_dma_setup++;
461 return error;
462 }
463 }
464
465 /* Make certain there are enough descriptors */
466 if (txr->tx_avail < (map->dm_nsegs + 2)) {
467 txr->txr_no_space = true;
468 txr->no_desc_avail.ev_count++;
469 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
470 return EAGAIN;
471 }
472
473 /*
474 * Set up the appropriate offload context
475 * this will consume the first descriptor
476 */
477 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
478 if (__predict_false(error)) {
479 return (error);
480 }
481
482 /* Do the flow director magic */
483 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
484 (txr->atr_sample) && (!adapter->fdir_reinit)) {
485 ++txr->atr_count;
486 if (txr->atr_count >= atr_sample_rate) {
487 ixgbe_atr(txr, m_head);
488 txr->atr_count = 0;
489 }
490 }
491
492 olinfo_status |= IXGBE_ADVTXD_CC;
493 i = txr->next_avail_desc;
494 for (j = 0; j < map->dm_nsegs; j++) {
495 bus_size_t seglen;
496 bus_addr_t segaddr;
497
498 txbuf = &txr->tx_buffers[i];
499 txd = &txr->tx_base[i];
500 seglen = map->dm_segs[j].ds_len;
501 segaddr = htole64(map->dm_segs[j].ds_addr);
502
503 txd->read.buffer_addr = segaddr;
504 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
505 txd->read.olinfo_status = htole32(olinfo_status);
506
507 if (++i == txr->num_desc)
508 i = 0;
509 }
510
511 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
512 txr->tx_avail -= map->dm_nsegs;
513 txr->next_avail_desc = i;
514
515 txbuf->m_head = m_head;
516 /*
517 * Here we swap the map so the last descriptor,
518 * which gets the completion interrupt has the
519 * real map, and the first descriptor gets the
520 * unused map from this descriptor.
521 */
522 txr->tx_buffers[first].map = txbuf->map;
523 txbuf->map = map;
524 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
525 BUS_DMASYNC_PREWRITE);
526
527 /* Set the EOP descriptor that will be marked done */
528 txbuf = &txr->tx_buffers[first];
529 txbuf->eop = txd;
530
531 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
532 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
533 /*
534 * Advance the Transmit Descriptor Tail (Tdt), this tells the
535 * hardware that this frame is available to transmit.
536 */
537 ++txr->total_packets.ev_count;
538 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
539
540 /*
541 * XXXX NOMPSAFE: ifp->if_data should be percpu.
542 */
543 ifp->if_obytes += m_head->m_pkthdr.len;
544 if (m_head->m_flags & M_MCAST)
545 ifp->if_omcasts++;
546
547 /* Mark queue as having work */
548 if (txr->busy == 0)
549 txr->busy = 1;
550
551 return (0);
552 } /* ixgbe_xmit */
553
554 /************************************************************************
555 * ixgbe_drain
556 ************************************************************************/
557 static void
558 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
559 {
560 struct mbuf *m;
561
562 IXGBE_TX_LOCK_ASSERT(txr);
563
564 if (txr->me == 0) {
565 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
566 IFQ_DEQUEUE(&ifp->if_snd, m);
567 m_freem(m);
568 IF_DROP(&ifp->if_snd);
569 }
570 }
571
572 while ((m = pcq_get(txr->txr_interq)) != NULL) {
573 m_freem(m);
574 txr->pcq_drops.ev_count++;
575 }
576 }
577
578 /************************************************************************
579 * ixgbe_allocate_transmit_buffers
580 *
581 * Allocate memory for tx_buffer structures. The tx_buffer stores all
582 * the information needed to transmit a packet on the wire. This is
583 * called only once at attach, setup is done every reset.
584 ************************************************************************/
585 static int
586 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
587 {
588 struct adapter *adapter = txr->adapter;
589 device_t dev = adapter->dev;
590 struct ixgbe_tx_buf *txbuf;
591 int error, i;
592
593 /*
594 * Setup DMA descriptor areas.
595 */
596 error = ixgbe_dma_tag_create(
597 /* parent */ adapter->osdep.dmat,
598 /* alignment */ 1,
599 /* bounds */ 0,
600 /* maxsize */ IXGBE_TSO_SIZE,
601 /* nsegments */ adapter->num_segs,
602 /* maxsegsize */ PAGE_SIZE,
603 /* flags */ 0,
604 &txr->txtag);
605 if (error != 0) {
606 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
607 goto fail;
608 }
609
610 txr->tx_buffers =
611 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
612 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
613 if (txr->tx_buffers == NULL) {
614 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
615 error = ENOMEM;
616 goto fail;
617 }
618
619 /* Create the descriptor buffer dma maps */
620 txbuf = txr->tx_buffers;
621 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
622 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
623 if (error != 0) {
624 aprint_error_dev(dev,
625 "Unable to create TX DMA map (%d)\n", error);
626 goto fail;
627 }
628 }
629
630 return 0;
631 fail:
632 /* We free all, it handles case where we are in the middle */
633 #if 0 /* XXX was FreeBSD */
634 ixgbe_free_transmit_structures(adapter);
635 #else
636 ixgbe_free_transmit_buffers(txr);
637 #endif
638 return (error);
639 } /* ixgbe_allocate_transmit_buffers */
640
641 /************************************************************************
642 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
643 ************************************************************************/
644 static void
645 ixgbe_setup_transmit_ring(struct tx_ring *txr)
646 {
647 struct adapter *adapter = txr->adapter;
648 struct ixgbe_tx_buf *txbuf;
649 #ifdef DEV_NETMAP
650 struct netmap_adapter *na = NA(adapter->ifp);
651 struct netmap_slot *slot;
652 #endif /* DEV_NETMAP */
653
654 /* Clear the old ring contents */
655 IXGBE_TX_LOCK(txr);
656
657 #ifdef DEV_NETMAP
658 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
659 /*
660 * (under lock): if in netmap mode, do some consistency
661 * checks and set slot to entry 0 of the netmap ring.
662 */
663 slot = netmap_reset(na, NR_TX, txr->me, 0);
664 }
665 #endif /* DEV_NETMAP */
666
667 bzero((void *)txr->tx_base,
668 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
669 /* Reset indices */
670 txr->next_avail_desc = 0;
671 txr->next_to_clean = 0;
672
673 /* Free any existing tx buffers. */
674 txbuf = txr->tx_buffers;
675 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
676 if (txbuf->m_head != NULL) {
677 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
678 0, txbuf->m_head->m_pkthdr.len,
679 BUS_DMASYNC_POSTWRITE);
680 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
681 m_freem(txbuf->m_head);
682 txbuf->m_head = NULL;
683 }
684
685 #ifdef DEV_NETMAP
686 /*
687 * In netmap mode, set the map for the packet buffer.
688 * NOTE: Some drivers (not this one) also need to set
689 * the physical buffer address in the NIC ring.
690 * Slots in the netmap ring (indexed by "si") are
691 * kring->nkr_hwofs positions "ahead" wrt the
692 * corresponding slot in the NIC ring. In some drivers
693 * (not here) nkr_hwofs can be negative. Function
694 * netmap_idx_n2k() handles wraparounds properly.
695 */
696 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
697 int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
698 netmap_load_map(na, txr->txtag,
699 txbuf->map, NMB(na, slot + si));
700 }
701 #endif /* DEV_NETMAP */
702
703 /* Clear the EOP descriptor pointer */
704 txbuf->eop = NULL;
705 }
706
707 /* Set the rate at which we sample packets */
708 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
709 txr->atr_sample = atr_sample_rate;
710
711 /* Set number of descriptors available */
712 txr->tx_avail = adapter->num_tx_desc;
713
714 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
715 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
716 IXGBE_TX_UNLOCK(txr);
717 } /* ixgbe_setup_transmit_ring */
718
719 /************************************************************************
720 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
721 ************************************************************************/
722 int
723 ixgbe_setup_transmit_structures(struct adapter *adapter)
724 {
725 struct tx_ring *txr = adapter->tx_rings;
726
727 for (int i = 0; i < adapter->num_queues; i++, txr++)
728 ixgbe_setup_transmit_ring(txr);
729
730 return (0);
731 } /* ixgbe_setup_transmit_structures */
732
733 /************************************************************************
734 * ixgbe_free_transmit_structures - Free all transmit rings.
735 ************************************************************************/
736 void
737 ixgbe_free_transmit_structures(struct adapter *adapter)
738 {
739 struct tx_ring *txr = adapter->tx_rings;
740
741 for (int i = 0; i < adapter->num_queues; i++, txr++) {
742 ixgbe_free_transmit_buffers(txr);
743 ixgbe_dma_free(adapter, &txr->txdma);
744 IXGBE_TX_LOCK_DESTROY(txr);
745 }
746 free(adapter->tx_rings, M_DEVBUF);
747 } /* ixgbe_free_transmit_structures */
748
749 /************************************************************************
750 * ixgbe_free_transmit_buffers
751 *
752 * Free transmit ring related data structures.
753 ************************************************************************/
754 static void
755 ixgbe_free_transmit_buffers(struct tx_ring *txr)
756 {
757 struct adapter *adapter = txr->adapter;
758 struct ixgbe_tx_buf *tx_buffer;
759 int i;
760
761 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
762
763 if (txr->tx_buffers == NULL)
764 return;
765
766 tx_buffer = txr->tx_buffers;
767 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
768 if (tx_buffer->m_head != NULL) {
769 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
770 0, tx_buffer->m_head->m_pkthdr.len,
771 BUS_DMASYNC_POSTWRITE);
772 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
773 m_freem(tx_buffer->m_head);
774 tx_buffer->m_head = NULL;
775 if (tx_buffer->map != NULL) {
776 ixgbe_dmamap_destroy(txr->txtag,
777 tx_buffer->map);
778 tx_buffer->map = NULL;
779 }
780 } else if (tx_buffer->map != NULL) {
781 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
782 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
783 tx_buffer->map = NULL;
784 }
785 }
786 if (txr->txr_interq != NULL) {
787 struct mbuf *m;
788
789 while ((m = pcq_get(txr->txr_interq)) != NULL)
790 m_freem(m);
791 pcq_destroy(txr->txr_interq);
792 }
793 if (txr->tx_buffers != NULL) {
794 free(txr->tx_buffers, M_DEVBUF);
795 txr->tx_buffers = NULL;
796 }
797 if (txr->txtag != NULL) {
798 ixgbe_dma_tag_destroy(txr->txtag);
799 txr->txtag = NULL;
800 }
801 } /* ixgbe_free_transmit_buffers */
802
803 /************************************************************************
804 * ixgbe_tx_ctx_setup
805 *
806 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
807 ************************************************************************/
808 static int
809 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
810 u32 *cmd_type_len, u32 *olinfo_status)
811 {
812 struct adapter *adapter = txr->adapter;
813 struct ixgbe_adv_tx_context_desc *TXD;
814 struct ether_vlan_header *eh;
815 #ifdef INET
816 struct ip *ip;
817 #endif
818 #ifdef INET6
819 struct ip6_hdr *ip6;
820 #endif
821 int ehdrlen, ip_hlen = 0;
822 int offload = TRUE;
823 int ctxd = txr->next_avail_desc;
824 u32 vlan_macip_lens = 0;
825 u32 type_tucmd_mlhl = 0;
826 u16 vtag = 0;
827 u16 etype;
828 u8 ipproto = 0;
829 char *l3d;
830
831
832 /* First check if TSO is to be used */
833 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
834 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
835
836 if (rv != 0)
837 ++adapter->tso_err.ev_count;
838 return rv;
839 }
840
841 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
842 offload = FALSE;
843
844 /* Indicate the whole packet as payload when not doing TSO */
845 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
846
847 /* Now ready a context descriptor */
848 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
849
850 /*
851 * In advanced descriptors the vlan tag must
852 * be placed into the context descriptor. Hence
853 * we need to make one even if not doing offloads.
854 */
855 if (vlan_has_tag(mp)) {
856 vtag = htole16(vlan_get_tag(mp));
857 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
858 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
859 (offload == FALSE))
860 return (0);
861
862 /*
863 * Determine where frame payload starts.
864 * Jump over vlan headers if already present,
865 * helpful for QinQ too.
866 */
867 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
868 eh = mtod(mp, struct ether_vlan_header *);
869 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
870 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
871 etype = ntohs(eh->evl_proto);
872 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
873 } else {
874 etype = ntohs(eh->evl_encap_proto);
875 ehdrlen = ETHER_HDR_LEN;
876 }
877
878 /* Set the ether header length */
879 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
880
881 if (offload == FALSE)
882 goto no_offloads;
883
884 /*
885 * If the first mbuf only includes the ethernet header,
886 * jump to the next one
887 * XXX: This assumes the stack splits mbufs containing headers
888 * on header boundaries
889 * XXX: And assumes the entire IP header is contained in one mbuf
890 */
891 if (mp->m_len == ehdrlen && mp->m_next)
892 l3d = mtod(mp->m_next, char *);
893 else
894 l3d = mtod(mp, char *) + ehdrlen;
895
896 switch (etype) {
897 #ifdef INET
898 case ETHERTYPE_IP:
899 ip = (struct ip *)(l3d);
900 ip_hlen = ip->ip_hl << 2;
901 ipproto = ip->ip_p;
902 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
903 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
904 ip->ip_sum == 0);
905 break;
906 #endif
907 #ifdef INET6
908 case ETHERTYPE_IPV6:
909 ip6 = (struct ip6_hdr *)(l3d);
910 ip_hlen = sizeof(struct ip6_hdr);
911 ipproto = ip6->ip6_nxt;
912 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
913 break;
914 #endif
915 default:
916 offload = false;
917 break;
918 }
919
920 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
921 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
922
923 vlan_macip_lens |= ip_hlen;
924
925 /* No support for offloads for non-L4 next headers */
926 switch (ipproto) {
927 case IPPROTO_TCP:
928 if (mp->m_pkthdr.csum_flags &
929 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
930 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
931 else
932 offload = false;
933 break;
934 case IPPROTO_UDP:
935 if (mp->m_pkthdr.csum_flags &
936 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
937 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
938 else
939 offload = false;
940 break;
941 default:
942 offload = false;
943 break;
944 }
945
946 if (offload) /* Insert L4 checksum into data descriptors */
947 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
948
949 no_offloads:
950 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
951
952 /* Now copy bits into descriptor */
953 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
954 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
955 TXD->seqnum_seed = htole32(0);
956 TXD->mss_l4len_idx = htole32(0);
957
958 /* We've consumed the first desc, adjust counters */
959 if (++ctxd == txr->num_desc)
960 ctxd = 0;
961 txr->next_avail_desc = ctxd;
962 --txr->tx_avail;
963
964 return (0);
965 } /* ixgbe_tx_ctx_setup */
966
967 /************************************************************************
968 * ixgbe_tso_setup
969 *
970 * Setup work for hardware segmentation offload (TSO) on
971 * adapters using advanced tx descriptors
972 ************************************************************************/
973 static int
974 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
975 u32 *olinfo_status)
976 {
977 struct ixgbe_adv_tx_context_desc *TXD;
978 struct ether_vlan_header *eh;
979 #ifdef INET6
980 struct ip6_hdr *ip6;
981 #endif
982 #ifdef INET
983 struct ip *ip;
984 #endif
985 struct tcphdr *th;
986 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
987 u32 vlan_macip_lens = 0;
988 u32 type_tucmd_mlhl = 0;
989 u32 mss_l4len_idx = 0, paylen;
990 u16 vtag = 0, eh_type;
991
992 /*
993 * Determine where frame payload starts.
994 * Jump over vlan headers if already present
995 */
996 eh = mtod(mp, struct ether_vlan_header *);
997 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
998 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
999 eh_type = eh->evl_proto;
1000 } else {
1001 ehdrlen = ETHER_HDR_LEN;
1002 eh_type = eh->evl_encap_proto;
1003 }
1004
1005 switch (ntohs(eh_type)) {
1006 #ifdef INET
1007 case ETHERTYPE_IP:
1008 ip = (struct ip *)(mp->m_data + ehdrlen);
1009 if (ip->ip_p != IPPROTO_TCP)
1010 return (ENXIO);
1011 ip->ip_sum = 0;
1012 ip_hlen = ip->ip_hl << 2;
1013 th = (struct tcphdr *)((char *)ip + ip_hlen);
1014 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1015 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1016 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1017 /* Tell transmit desc to also do IPv4 checksum. */
1018 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1019 break;
1020 #endif
1021 #ifdef INET6
1022 case ETHERTYPE_IPV6:
1023 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1024 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1025 if (ip6->ip6_nxt != IPPROTO_TCP)
1026 return (ENXIO);
1027 ip_hlen = sizeof(struct ip6_hdr);
1028 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1029 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1030 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1031 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1032 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1033 break;
1034 #endif
1035 default:
1036 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1037 __func__, ntohs(eh_type));
1038 break;
1039 }
1040
1041 ctxd = txr->next_avail_desc;
1042 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1043
1044 tcp_hlen = th->th_off << 2;
1045
1046 /* This is used in the transmit desc in encap */
1047 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1048
1049 /* VLAN MACLEN IPLEN */
1050 if (vlan_has_tag(mp)) {
1051 vtag = htole16(vlan_get_tag(mp));
1052 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1053 }
1054
1055 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1056 vlan_macip_lens |= ip_hlen;
1057 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1058
1059 /* ADV DTYPE TUCMD */
1060 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1061 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1062 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1063
1064 /* MSS L4LEN IDX */
1065 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1066 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1067 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1068
1069 TXD->seqnum_seed = htole32(0);
1070
1071 if (++ctxd == txr->num_desc)
1072 ctxd = 0;
1073
1074 txr->tx_avail--;
1075 txr->next_avail_desc = ctxd;
1076 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1077 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1078 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1079 ++txr->tso_tx.ev_count;
1080
1081 return (0);
1082 } /* ixgbe_tso_setup */
1083
1084
1085 /************************************************************************
1086 * ixgbe_txeof
1087 *
1088 * Examine each tx_buffer in the used queue. If the hardware is done
1089 * processing the packet then free associated resources. The
1090 * tx_buffer is put back on the free queue.
1091 ************************************************************************/
1092 bool
1093 ixgbe_txeof(struct tx_ring *txr)
1094 {
1095 struct adapter *adapter = txr->adapter;
1096 struct ifnet *ifp = adapter->ifp;
1097 struct ixgbe_tx_buf *buf;
1098 union ixgbe_adv_tx_desc *txd;
1099 u32 work, processed = 0;
1100 u32 limit = adapter->tx_process_limit;
1101
1102 KASSERT(mutex_owned(&txr->tx_mtx));
1103
1104 #ifdef DEV_NETMAP
1105 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1106 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1107 struct netmap_adapter *na = NA(adapter->ifp);
1108 struct netmap_kring *kring = na->tx_rings[txr->me];
1109 txd = txr->tx_base;
1110 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1111 BUS_DMASYNC_POSTREAD);
1112 /*
1113 * In netmap mode, all the work is done in the context
1114 * of the client thread. Interrupt handlers only wake up
1115 * clients, which may be sleeping on individual rings
1116 * or on a global resource for all rings.
1117 * To implement tx interrupt mitigation, we wake up the client
1118 * thread roughly every half ring, even if the NIC interrupts
1119 * more frequently. This is implemented as follows:
1120 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1121 * the slot that should wake up the thread (nkr_num_slots
1122 * means the user thread should not be woken up);
1123 * - the driver ignores tx interrupts unless netmap_mitigate=0
1124 * or the slot has the DD bit set.
1125 */
1126 if (kring->nr_kflags < kring->nkr_num_slots &&
1127 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD) {
1128 netmap_tx_irq(ifp, txr->me);
1129 }
1130 return false;
1131 }
1132 #endif /* DEV_NETMAP */
1133
1134 if (txr->tx_avail == txr->num_desc) {
1135 txr->busy = 0;
1136 return false;
1137 }
1138
1139 /* Get work starting point */
1140 work = txr->next_to_clean;
1141 buf = &txr->tx_buffers[work];
1142 txd = &txr->tx_base[work];
1143 work -= txr->num_desc; /* The distance to ring end */
1144 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1145 BUS_DMASYNC_POSTREAD);
1146
1147 do {
1148 union ixgbe_adv_tx_desc *eop = buf->eop;
1149 if (eop == NULL) /* No work */
1150 break;
1151
1152 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1153 break; /* I/O not complete */
1154
1155 if (buf->m_head) {
1156 txr->bytes += buf->m_head->m_pkthdr.len;
1157 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1158 0, buf->m_head->m_pkthdr.len,
1159 BUS_DMASYNC_POSTWRITE);
1160 ixgbe_dmamap_unload(txr->txtag, buf->map);
1161 m_freem(buf->m_head);
1162 buf->m_head = NULL;
1163 }
1164 buf->eop = NULL;
1165 txr->txr_no_space = false;
1166 ++txr->tx_avail;
1167
1168 /* We clean the range if multi segment */
1169 while (txd != eop) {
1170 ++txd;
1171 ++buf;
1172 ++work;
1173 /* wrap the ring? */
1174 if (__predict_false(!work)) {
1175 work -= txr->num_desc;
1176 buf = txr->tx_buffers;
1177 txd = txr->tx_base;
1178 }
1179 if (buf->m_head) {
1180 txr->bytes +=
1181 buf->m_head->m_pkthdr.len;
1182 bus_dmamap_sync(txr->txtag->dt_dmat,
1183 buf->map,
1184 0, buf->m_head->m_pkthdr.len,
1185 BUS_DMASYNC_POSTWRITE);
1186 ixgbe_dmamap_unload(txr->txtag,
1187 buf->map);
1188 m_freem(buf->m_head);
1189 buf->m_head = NULL;
1190 }
1191 ++txr->tx_avail;
1192 buf->eop = NULL;
1193
1194 }
1195 ++txr->packets;
1196 ++processed;
1197 ++ifp->if_opackets;
1198
1199 /* Try the next packet */
1200 ++txd;
1201 ++buf;
1202 ++work;
1203 /* reset with a wrap */
1204 if (__predict_false(!work)) {
1205 work -= txr->num_desc;
1206 buf = txr->tx_buffers;
1207 txd = txr->tx_base;
1208 }
1209 prefetch(txd);
1210 } while (__predict_true(--limit));
1211
1212 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1213 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1214
1215 work += txr->num_desc;
1216 txr->next_to_clean = work;
1217
1218 /*
1219 * Queue Hang detection, we know there's
1220 * work outstanding or the first return
1221 * would have been taken, so increment busy
1222 * if nothing managed to get cleaned, then
1223 * in local_timer it will be checked and
1224 * marked as HUNG if it exceeds a MAX attempt.
1225 */
1226 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1227 ++txr->busy;
1228 /*
1229 * If anything gets cleaned we reset state to 1,
1230 * note this will turn off HUNG if its set.
1231 */
1232 if (processed)
1233 txr->busy = 1;
1234
1235 if (txr->tx_avail == txr->num_desc)
1236 txr->busy = 0;
1237
1238 return ((limit > 0) ? false : true);
1239 } /* ixgbe_txeof */
1240
1241 /************************************************************************
1242 * ixgbe_rsc_count
1243 *
1244 * Used to detect a descriptor that has been merged by Hardware RSC.
1245 ************************************************************************/
1246 static inline u32
1247 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1248 {
1249 return (le32toh(rx->wb.lower.lo_dword.data) &
1250 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1251 } /* ixgbe_rsc_count */
1252
1253 /************************************************************************
1254 * ixgbe_setup_hw_rsc
1255 *
1256 * Initialize Hardware RSC (LRO) feature on 82599
1257 * for an RX ring, this is toggled by the LRO capability
1258 * even though it is transparent to the stack.
1259 *
1260 * NOTE: Since this HW feature only works with IPv4 and
1261 * testing has shown soft LRO to be as effective,
1262 * this feature will be disabled by default.
1263 ************************************************************************/
1264 static void
1265 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1266 {
1267 struct adapter *adapter = rxr->adapter;
1268 struct ixgbe_hw *hw = &adapter->hw;
1269 u32 rscctrl, rdrxctl;
1270
1271 /* If turning LRO/RSC off we need to disable it */
1272 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1273 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1274 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1275 return;
1276 }
1277
1278 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1279 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1280 #ifdef DEV_NETMAP
1281 /* Always strip CRC unless Netmap disabled it */
1282 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1283 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1284 ix_crcstrip)
1285 #endif /* DEV_NETMAP */
1286 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1287 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1288 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1289
1290 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1291 rscctrl |= IXGBE_RSCCTL_RSCEN;
1292 /*
1293 * Limit the total number of descriptors that
1294 * can be combined, so it does not exceed 64K
1295 */
1296 if (rxr->mbuf_sz == MCLBYTES)
1297 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1298 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1299 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1300 else if (rxr->mbuf_sz == MJUM9BYTES)
1301 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1302 else /* Using 16K cluster */
1303 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1304
1305 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1306
1307 /* Enable TCP header recognition */
1308 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1309 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1310
1311 /* Disable RSC for ACK packets */
1312 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1313 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1314
1315 rxr->hw_rsc = TRUE;
1316 } /* ixgbe_setup_hw_rsc */
1317
1318 /************************************************************************
1319 * ixgbe_refresh_mbufs
1320 *
1321 * Refresh mbuf buffers for RX descriptor rings
1322 * - now keeps its own state so discards due to resource
1323 * exhaustion are unnecessary, if an mbuf cannot be obtained
1324 * it just returns, keeping its placeholder, thus it can simply
1325 * be recalled to try again.
1326 ************************************************************************/
1327 static void
1328 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1329 {
1330 struct adapter *adapter = rxr->adapter;
1331 struct ixgbe_rx_buf *rxbuf;
1332 struct mbuf *mp;
1333 int i, j, error;
1334 bool refreshed = false;
1335
1336 i = j = rxr->next_to_refresh;
1337 /* Control the loop with one beyond */
1338 if (++j == rxr->num_desc)
1339 j = 0;
1340
1341 while (j != limit) {
1342 rxbuf = &rxr->rx_buffers[i];
1343 if (rxbuf->buf == NULL) {
1344 mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1345 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1346 if (mp == NULL) {
1347 rxr->no_jmbuf.ev_count++;
1348 goto update;
1349 }
1350 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1351 m_adj(mp, ETHER_ALIGN);
1352 } else
1353 mp = rxbuf->buf;
1354
1355 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1356
1357 /* If we're dealing with an mbuf that was copied rather
1358 * than replaced, there's no need to go through busdma.
1359 */
1360 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1361 /* Get the memory mapping */
1362 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1363 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1364 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1365 if (error != 0) {
1366 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1367 m_free(mp);
1368 rxbuf->buf = NULL;
1369 goto update;
1370 }
1371 rxbuf->buf = mp;
1372 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1373 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1374 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1375 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1376 } else {
1377 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1378 rxbuf->flags &= ~IXGBE_RX_COPY;
1379 }
1380
1381 refreshed = true;
1382 /* Next is precalculated */
1383 i = j;
1384 rxr->next_to_refresh = i;
1385 if (++j == rxr->num_desc)
1386 j = 0;
1387 }
1388
1389 update:
1390 if (refreshed) /* Update hardware tail index */
1391 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1392
1393 return;
1394 } /* ixgbe_refresh_mbufs */
1395
1396 /************************************************************************
1397 * ixgbe_allocate_receive_buffers
1398 *
1399 * Allocate memory for rx_buffer structures. Since we use one
1400 * rx_buffer per received packet, the maximum number of rx_buffer's
1401 * that we'll need is equal to the number of receive descriptors
1402 * that we've allocated.
1403 ************************************************************************/
1404 static int
1405 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1406 {
1407 struct adapter *adapter = rxr->adapter;
1408 device_t dev = adapter->dev;
1409 struct ixgbe_rx_buf *rxbuf;
1410 int bsize, error;
1411
1412 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1413 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1414 M_NOWAIT | M_ZERO);
1415 if (rxr->rx_buffers == NULL) {
1416 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1417 error = ENOMEM;
1418 goto fail;
1419 }
1420
1421 error = ixgbe_dma_tag_create(
1422 /* parent */ adapter->osdep.dmat,
1423 /* alignment */ 1,
1424 /* bounds */ 0,
1425 /* maxsize */ MJUM16BYTES,
1426 /* nsegments */ 1,
1427 /* maxsegsize */ MJUM16BYTES,
1428 /* flags */ 0,
1429 &rxr->ptag);
1430 if (error != 0) {
1431 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1432 goto fail;
1433 }
1434
1435 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1436 rxbuf = &rxr->rx_buffers[i];
1437 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1438 if (error) {
1439 aprint_error_dev(dev, "Unable to create RX dma map\n");
1440 goto fail;
1441 }
1442 }
1443
1444 return (0);
1445
1446 fail:
1447 /* Frees all, but can handle partial completion */
1448 ixgbe_free_receive_structures(adapter);
1449
1450 return (error);
1451 } /* ixgbe_allocate_receive_buffers */
1452
1453 /************************************************************************
1454 * ixgbe_free_receive_ring
1455 ************************************************************************/
1456 static void
1457 ixgbe_free_receive_ring(struct rx_ring *rxr)
1458 {
1459 for (int i = 0; i < rxr->num_desc; i++) {
1460 ixgbe_rx_discard(rxr, i);
1461 }
1462 } /* ixgbe_free_receive_ring */
1463
1464 /************************************************************************
1465 * ixgbe_setup_receive_ring
1466 *
1467 * Initialize a receive ring and its buffers.
1468 ************************************************************************/
1469 static int
1470 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1471 {
1472 struct adapter *adapter;
1473 struct ixgbe_rx_buf *rxbuf;
1474 #ifdef LRO
1475 struct ifnet *ifp;
1476 struct lro_ctrl *lro = &rxr->lro;
1477 #endif /* LRO */
1478 #ifdef DEV_NETMAP
1479 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1480 struct netmap_slot *slot;
1481 #endif /* DEV_NETMAP */
1482 int rsize, error = 0;
1483
1484 adapter = rxr->adapter;
1485 #ifdef LRO
1486 ifp = adapter->ifp;
1487 #endif /* LRO */
1488
1489 /* Clear the ring contents */
1490 IXGBE_RX_LOCK(rxr);
1491
1492 #ifdef DEV_NETMAP
1493 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1494 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1495 #endif /* DEV_NETMAP */
1496
1497 rsize = roundup2(adapter->num_rx_desc *
1498 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1499 bzero((void *)rxr->rx_base, rsize);
1500 /* Cache the size */
1501 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1502
1503 /* Free current RX buffer structs and their mbufs */
1504 ixgbe_free_receive_ring(rxr);
1505
1506 IXGBE_RX_UNLOCK(rxr);
1507 /*
1508 * Now reinitialize our supply of jumbo mbufs. The number
1509 * or size of jumbo mbufs may have changed.
1510 * Assume all of rxr->ptag are the same.
1511 */
1512 ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
1513 (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
1514
1515 IXGBE_RX_LOCK(rxr);
1516
1517 /* Now replenish the mbufs */
1518 for (int j = 0; j != rxr->num_desc; ++j) {
1519 struct mbuf *mp;
1520
1521 rxbuf = &rxr->rx_buffers[j];
1522
1523 #ifdef DEV_NETMAP
1524 /*
1525 * In netmap mode, fill the map and set the buffer
1526 * address in the NIC ring, considering the offset
1527 * between the netmap and NIC rings (see comment in
1528 * ixgbe_setup_transmit_ring() ). No need to allocate
1529 * an mbuf, so end the block with a continue;
1530 */
1531 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1532 int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
1533 uint64_t paddr;
1534 void *addr;
1535
1536 addr = PNMB(na, slot + sj, &paddr);
1537 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1538 /* Update descriptor and the cached value */
1539 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1540 rxbuf->addr = htole64(paddr);
1541 continue;
1542 }
1543 #endif /* DEV_NETMAP */
1544
1545 rxbuf->flags = 0;
1546 rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1547 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1548 if (rxbuf->buf == NULL) {
1549 error = ENOBUFS;
1550 goto fail;
1551 }
1552 mp = rxbuf->buf;
1553 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1554 /* Get the memory mapping */
1555 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1556 mp, BUS_DMA_NOWAIT);
1557 if (error != 0)
1558 goto fail;
1559 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1560 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1561 /* Update the descriptor and the cached value */
1562 rxr->rx_base[j].read.pkt_addr =
1563 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1564 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1565 }
1566
1567
1568 /* Setup our descriptor indices */
1569 rxr->next_to_check = 0;
1570 rxr->next_to_refresh = 0;
1571 rxr->lro_enabled = FALSE;
1572 rxr->rx_copies.ev_count = 0;
1573 #if 0 /* NetBSD */
1574 rxr->rx_bytes.ev_count = 0;
1575 #if 1 /* Fix inconsistency */
1576 rxr->rx_packets.ev_count = 0;
1577 #endif
1578 #endif
1579 rxr->vtag_strip = FALSE;
1580
1581 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1582 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1583
1584 /*
1585 * Now set up the LRO interface
1586 */
1587 if (ixgbe_rsc_enable)
1588 ixgbe_setup_hw_rsc(rxr);
1589 #ifdef LRO
1590 else if (ifp->if_capenable & IFCAP_LRO) {
1591 device_t dev = adapter->dev;
1592 int err = tcp_lro_init(lro);
1593 if (err) {
1594 device_printf(dev, "LRO Initialization failed!\n");
1595 goto fail;
1596 }
1597 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1598 rxr->lro_enabled = TRUE;
1599 lro->ifp = adapter->ifp;
1600 }
1601 #endif /* LRO */
1602
1603 IXGBE_RX_UNLOCK(rxr);
1604
1605 return (0);
1606
1607 fail:
1608 ixgbe_free_receive_ring(rxr);
1609 IXGBE_RX_UNLOCK(rxr);
1610
1611 return (error);
1612 } /* ixgbe_setup_receive_ring */
1613
1614 /************************************************************************
1615 * ixgbe_setup_receive_structures - Initialize all receive rings.
1616 ************************************************************************/
1617 int
1618 ixgbe_setup_receive_structures(struct adapter *adapter)
1619 {
1620 struct rx_ring *rxr = adapter->rx_rings;
1621 int j;
1622
1623 for (j = 0; j < adapter->num_queues; j++, rxr++)
1624 if (ixgbe_setup_receive_ring(rxr))
1625 goto fail;
1626
1627 return (0);
1628 fail:
1629 /*
1630 * Free RX buffers allocated so far, we will only handle
1631 * the rings that completed, the failing case will have
1632 * cleaned up for itself. 'j' failed, so its the terminus.
1633 */
1634 for (int i = 0; i < j; ++i) {
1635 rxr = &adapter->rx_rings[i];
1636 IXGBE_RX_LOCK(rxr);
1637 ixgbe_free_receive_ring(rxr);
1638 IXGBE_RX_UNLOCK(rxr);
1639 }
1640
1641 return (ENOBUFS);
1642 } /* ixgbe_setup_receive_structures */
1643
1644
1645 /************************************************************************
1646 * ixgbe_free_receive_structures - Free all receive rings.
1647 ************************************************************************/
1648 void
1649 ixgbe_free_receive_structures(struct adapter *adapter)
1650 {
1651 struct rx_ring *rxr = adapter->rx_rings;
1652
1653 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1654
1655 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1656 ixgbe_free_receive_buffers(rxr);
1657 #ifdef LRO
1658 /* Free LRO memory */
1659 tcp_lro_free(&rxr->lro);
1660 #endif /* LRO */
1661 /* Free the ring memory as well */
1662 ixgbe_dma_free(adapter, &rxr->rxdma);
1663 IXGBE_RX_LOCK_DESTROY(rxr);
1664 }
1665
1666 free(adapter->rx_rings, M_DEVBUF);
1667 } /* ixgbe_free_receive_structures */
1668
1669
1670 /************************************************************************
1671 * ixgbe_free_receive_buffers - Free receive ring data structures
1672 ************************************************************************/
1673 static void
1674 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1675 {
1676 struct adapter *adapter = rxr->adapter;
1677 struct ixgbe_rx_buf *rxbuf;
1678
1679 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1680
1681 /* Cleanup any existing buffers */
1682 if (rxr->rx_buffers != NULL) {
1683 for (int i = 0; i < adapter->num_rx_desc; i++) {
1684 rxbuf = &rxr->rx_buffers[i];
1685 ixgbe_rx_discard(rxr, i);
1686 if (rxbuf->pmap != NULL) {
1687 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1688 rxbuf->pmap = NULL;
1689 }
1690 }
1691 if (rxr->rx_buffers != NULL) {
1692 free(rxr->rx_buffers, M_DEVBUF);
1693 rxr->rx_buffers = NULL;
1694 }
1695 }
1696
1697 if (rxr->ptag != NULL) {
1698 ixgbe_dma_tag_destroy(rxr->ptag);
1699 rxr->ptag = NULL;
1700 }
1701
1702 return;
1703 } /* ixgbe_free_receive_buffers */
1704
1705 /************************************************************************
1706 * ixgbe_rx_input
1707 ************************************************************************/
1708 static __inline void
1709 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1710 u32 ptype)
1711 {
1712 struct adapter *adapter = ifp->if_softc;
1713
1714 #ifdef LRO
1715 struct ethercom *ec = &adapter->osdep.ec;
1716
1717 /*
1718 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1719 * should be computed by hardware. Also it should not have VLAN tag in
1720 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1721 */
1722 if (rxr->lro_enabled &&
1723 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1724 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1725 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1726 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1727 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1728 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1729 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1730 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1731 /*
1732 * Send to the stack if:
1733 ** - LRO not enabled, or
1734 ** - no LRO resources, or
1735 ** - lro enqueue fails
1736 */
1737 if (rxr->lro.lro_cnt != 0)
1738 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1739 return;
1740 }
1741 #endif /* LRO */
1742
1743 if_percpuq_enqueue(adapter->ipq, m);
1744 } /* ixgbe_rx_input */
1745
1746 /************************************************************************
1747 * ixgbe_rx_discard
1748 ************************************************************************/
1749 static __inline void
1750 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1751 {
1752 struct ixgbe_rx_buf *rbuf;
1753
1754 rbuf = &rxr->rx_buffers[i];
1755
1756 /*
1757 * With advanced descriptors the writeback
1758 * clobbers the buffer addrs, so its easier
1759 * to just free the existing mbufs and take
1760 * the normal refresh path to get new buffers
1761 * and mapping.
1762 */
1763
1764 if (rbuf->fmp != NULL) {/* Partial chain ? */
1765 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1766 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1767 m_freem(rbuf->fmp);
1768 rbuf->fmp = NULL;
1769 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1770 } else if (rbuf->buf) {
1771 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1772 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1773 m_free(rbuf->buf);
1774 rbuf->buf = NULL;
1775 }
1776 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1777
1778 rbuf->flags = 0;
1779
1780 return;
1781 } /* ixgbe_rx_discard */
1782
1783
1784 /************************************************************************
1785 * ixgbe_rxeof
1786 *
1787 * Executes in interrupt context. It replenishes the
1788 * mbufs in the descriptor and sends data which has
1789 * been dma'ed into host memory to upper layer.
1790 *
1791 * Return TRUE for more work, FALSE for all clean.
1792 ************************************************************************/
1793 bool
1794 ixgbe_rxeof(struct ix_queue *que)
1795 {
1796 struct adapter *adapter = que->adapter;
1797 struct rx_ring *rxr = que->rxr;
1798 struct ifnet *ifp = adapter->ifp;
1799 #ifdef LRO
1800 struct lro_ctrl *lro = &rxr->lro;
1801 #endif /* LRO */
1802 union ixgbe_adv_rx_desc *cur;
1803 struct ixgbe_rx_buf *rbuf, *nbuf;
1804 int i, nextp, processed = 0;
1805 u32 staterr = 0;
1806 u32 count = adapter->rx_process_limit;
1807 #ifdef RSS
1808 u16 pkt_info;
1809 #endif
1810
1811 IXGBE_RX_LOCK(rxr);
1812
1813 #ifdef DEV_NETMAP
1814 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1815 /* Same as the txeof routine: wakeup clients on intr. */
1816 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1817 IXGBE_RX_UNLOCK(rxr);
1818 return (FALSE);
1819 }
1820 }
1821 #endif /* DEV_NETMAP */
1822
1823 for (i = rxr->next_to_check; count != 0;) {
1824 struct mbuf *sendmp, *mp;
1825 u32 rsc, ptype;
1826 u16 len;
1827 u16 vtag = 0;
1828 bool eop;
1829
1830 /* Sync the ring. */
1831 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1832 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1833
1834 cur = &rxr->rx_base[i];
1835 staterr = le32toh(cur->wb.upper.status_error);
1836 #ifdef RSS
1837 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1838 #endif
1839
1840 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1841 break;
1842
1843 count--;
1844 sendmp = NULL;
1845 nbuf = NULL;
1846 rsc = 0;
1847 cur->wb.upper.status_error = 0;
1848 rbuf = &rxr->rx_buffers[i];
1849 mp = rbuf->buf;
1850
1851 len = le16toh(cur->wb.upper.length);
1852 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1853 IXGBE_RXDADV_PKTTYPE_MASK;
1854 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1855
1856 /* Make sure bad packets are discarded */
1857 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1858 #if __FreeBSD_version >= 1100036
1859 if (adapter->feat_en & IXGBE_FEATURE_VF)
1860 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1861 #endif
1862 rxr->rx_discarded.ev_count++;
1863 ixgbe_rx_discard(rxr, i);
1864 goto next_desc;
1865 }
1866
1867 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1868 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1869
1870 /*
1871 * On 82599 which supports a hardware
1872 * LRO (called HW RSC), packets need
1873 * not be fragmented across sequential
1874 * descriptors, rather the next descriptor
1875 * is indicated in bits of the descriptor.
1876 * This also means that we might proceses
1877 * more than one packet at a time, something
1878 * that has never been true before, it
1879 * required eliminating global chain pointers
1880 * in favor of what we are doing here. -jfv
1881 */
1882 if (!eop) {
1883 /*
1884 * Figure out the next descriptor
1885 * of this frame.
1886 */
1887 if (rxr->hw_rsc == TRUE) {
1888 rsc = ixgbe_rsc_count(cur);
1889 rxr->rsc_num += (rsc - 1);
1890 }
1891 if (rsc) { /* Get hardware index */
1892 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1893 IXGBE_RXDADV_NEXTP_SHIFT);
1894 } else { /* Just sequential */
1895 nextp = i + 1;
1896 if (nextp == adapter->num_rx_desc)
1897 nextp = 0;
1898 }
1899 nbuf = &rxr->rx_buffers[nextp];
1900 prefetch(nbuf);
1901 }
1902 /*
1903 * Rather than using the fmp/lmp global pointers
1904 * we now keep the head of a packet chain in the
1905 * buffer struct and pass this along from one
1906 * descriptor to the next, until we get EOP.
1907 */
1908 mp->m_len = len;
1909 /*
1910 * See if there is a stored head
1911 * that determines what we are
1912 */
1913 sendmp = rbuf->fmp;
1914 if (sendmp != NULL) { /* secondary frag */
1915 rbuf->buf = rbuf->fmp = NULL;
1916 mp->m_flags &= ~M_PKTHDR;
1917 sendmp->m_pkthdr.len += mp->m_len;
1918 } else {
1919 /*
1920 * Optimize. This might be a small packet,
1921 * maybe just a TCP ACK. Do a fast copy that
1922 * is cache aligned into a new mbuf, and
1923 * leave the old mbuf+cluster for re-use.
1924 */
1925 if (eop && len <= IXGBE_RX_COPY_LEN) {
1926 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1927 if (sendmp != NULL) {
1928 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1929 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1930 len);
1931 sendmp->m_len = len;
1932 rxr->rx_copies.ev_count++;
1933 rbuf->flags |= IXGBE_RX_COPY;
1934 }
1935 }
1936 if (sendmp == NULL) {
1937 rbuf->buf = rbuf->fmp = NULL;
1938 sendmp = mp;
1939 }
1940
1941 /* first desc of a non-ps chain */
1942 sendmp->m_flags |= M_PKTHDR;
1943 sendmp->m_pkthdr.len = mp->m_len;
1944 }
1945 ++processed;
1946
1947 /* Pass the head pointer on */
1948 if (eop == 0) {
1949 nbuf->fmp = sendmp;
1950 sendmp = NULL;
1951 mp->m_next = nbuf->buf;
1952 } else { /* Sending this frame */
1953 m_set_rcvif(sendmp, ifp);
1954 ++rxr->packets;
1955 rxr->rx_packets.ev_count++;
1956 /* capture data for AIM */
1957 rxr->bytes += sendmp->m_pkthdr.len;
1958 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1959 /* Process vlan info */
1960 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1961 vtag = le16toh(cur->wb.upper.vlan);
1962 if (vtag) {
1963 vlan_set_tag(sendmp, vtag);
1964 }
1965 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1966 ixgbe_rx_checksum(staterr, sendmp, ptype,
1967 &adapter->stats.pf);
1968 }
1969
1970 #if 0 /* FreeBSD */
1971 /*
1972 * In case of multiqueue, we have RXCSUM.PCSD bit set
1973 * and never cleared. This means we have RSS hash
1974 * available to be used.
1975 */
1976 if (adapter->num_queues > 1) {
1977 sendmp->m_pkthdr.flowid =
1978 le32toh(cur->wb.lower.hi_dword.rss);
1979 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1980 case IXGBE_RXDADV_RSSTYPE_IPV4:
1981 M_HASHTYPE_SET(sendmp,
1982 M_HASHTYPE_RSS_IPV4);
1983 break;
1984 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1985 M_HASHTYPE_SET(sendmp,
1986 M_HASHTYPE_RSS_TCP_IPV4);
1987 break;
1988 case IXGBE_RXDADV_RSSTYPE_IPV6:
1989 M_HASHTYPE_SET(sendmp,
1990 M_HASHTYPE_RSS_IPV6);
1991 break;
1992 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1993 M_HASHTYPE_SET(sendmp,
1994 M_HASHTYPE_RSS_TCP_IPV6);
1995 break;
1996 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1997 M_HASHTYPE_SET(sendmp,
1998 M_HASHTYPE_RSS_IPV6_EX);
1999 break;
2000 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2001 M_HASHTYPE_SET(sendmp,
2002 M_HASHTYPE_RSS_TCP_IPV6_EX);
2003 break;
2004 #if __FreeBSD_version > 1100000
2005 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2006 M_HASHTYPE_SET(sendmp,
2007 M_HASHTYPE_RSS_UDP_IPV4);
2008 break;
2009 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2010 M_HASHTYPE_SET(sendmp,
2011 M_HASHTYPE_RSS_UDP_IPV6);
2012 break;
2013 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2014 M_HASHTYPE_SET(sendmp,
2015 M_HASHTYPE_RSS_UDP_IPV6_EX);
2016 break;
2017 #endif
2018 default:
2019 M_HASHTYPE_SET(sendmp,
2020 M_HASHTYPE_OPAQUE_HASH);
2021 }
2022 } else {
2023 sendmp->m_pkthdr.flowid = que->msix;
2024 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2025 }
2026 #endif
2027 }
2028 next_desc:
2029 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2030 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2031
2032 /* Advance our pointers to the next descriptor. */
2033 if (++i == rxr->num_desc)
2034 i = 0;
2035
2036 /* Now send to the stack or do LRO */
2037 if (sendmp != NULL) {
2038 rxr->next_to_check = i;
2039 IXGBE_RX_UNLOCK(rxr);
2040 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2041 IXGBE_RX_LOCK(rxr);
2042 i = rxr->next_to_check;
2043 }
2044
2045 /* Every 8 descriptors we go to refresh mbufs */
2046 if (processed == 8) {
2047 ixgbe_refresh_mbufs(rxr, i);
2048 processed = 0;
2049 }
2050 }
2051
2052 /* Refresh any remaining buf structs */
2053 if (ixgbe_rx_unrefreshed(rxr))
2054 ixgbe_refresh_mbufs(rxr, i);
2055
2056 rxr->next_to_check = i;
2057
2058 IXGBE_RX_UNLOCK(rxr);
2059
2060 #ifdef LRO
2061 /*
2062 * Flush any outstanding LRO work
2063 */
2064 tcp_lro_flush_all(lro);
2065 #endif /* LRO */
2066
2067 /*
2068 * Still have cleaning to do?
2069 */
2070 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2071 return (TRUE);
2072
2073 return (FALSE);
2074 } /* ixgbe_rxeof */
2075
2076
2077 /************************************************************************
2078 * ixgbe_rx_checksum
2079 *
2080 * Verify that the hardware indicated that the checksum is valid.
2081 * Inform the stack about the status of checksum so that stack
2082 * doesn't spend time verifying the checksum.
2083 ************************************************************************/
2084 static void
2085 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2086 struct ixgbe_hw_stats *stats)
2087 {
2088 u16 status = (u16)staterr;
2089 u8 errors = (u8)(staterr >> 24);
2090 #if 0
2091 bool sctp = false;
2092
2093 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2094 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2095 sctp = true;
2096 #endif
2097
2098 /* IPv4 checksum */
2099 if (status & IXGBE_RXD_STAT_IPCS) {
2100 stats->ipcs.ev_count++;
2101 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2102 /* IP Checksum Good */
2103 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2104 } else {
2105 stats->ipcs_bad.ev_count++;
2106 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2107 }
2108 }
2109 /* TCP/UDP/SCTP checksum */
2110 if (status & IXGBE_RXD_STAT_L4CS) {
2111 stats->l4cs.ev_count++;
2112 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2113 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2114 mp->m_pkthdr.csum_flags |= type;
2115 } else {
2116 stats->l4cs_bad.ev_count++;
2117 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2118 }
2119 }
2120 } /* ixgbe_rx_checksum */
2121
2122 /************************************************************************
2123 * ixgbe_dma_malloc
2124 ************************************************************************/
2125 int
2126 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2127 struct ixgbe_dma_alloc *dma, const int mapflags)
2128 {
2129 device_t dev = adapter->dev;
2130 int r, rsegs;
2131
2132 r = ixgbe_dma_tag_create(
2133 /* parent */ adapter->osdep.dmat,
2134 /* alignment */ DBA_ALIGN,
2135 /* bounds */ 0,
2136 /* maxsize */ size,
2137 /* nsegments */ 1,
2138 /* maxsegsize */ size,
2139 /* flags */ BUS_DMA_ALLOCNOW,
2140 &dma->dma_tag);
2141 if (r != 0) {
2142 aprint_error_dev(dev,
2143 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2144 r);
2145 goto fail_0;
2146 }
2147
2148 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2149 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2150 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2151 if (r != 0) {
2152 aprint_error_dev(dev,
2153 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2154 goto fail_1;
2155 }
2156
2157 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2158 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2159 if (r != 0) {
2160 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2161 __func__, r);
2162 goto fail_2;
2163 }
2164
2165 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2166 if (r != 0) {
2167 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2168 __func__, r);
2169 goto fail_3;
2170 }
2171
2172 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2173 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2174 if (r != 0) {
2175 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2176 __func__, r);
2177 goto fail_4;
2178 }
2179 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2180 dma->dma_size = size;
2181 return 0;
2182 fail_4:
2183 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2184 fail_3:
2185 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2186 fail_2:
2187 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2188 fail_1:
2189 ixgbe_dma_tag_destroy(dma->dma_tag);
2190 fail_0:
2191
2192 return (r);
2193 } /* ixgbe_dma_malloc */
2194
2195 /************************************************************************
2196 * ixgbe_dma_free
2197 ************************************************************************/
2198 void
2199 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2200 {
2201 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2202 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2203 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2204 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2205 ixgbe_dma_tag_destroy(dma->dma_tag);
2206 } /* ixgbe_dma_free */
2207
2208
2209 /************************************************************************
2210 * ixgbe_allocate_queues
2211 *
2212 * Allocate memory for the transmit and receive rings, and then
2213 * the descriptors associated with each, called only once at attach.
2214 ************************************************************************/
2215 int
2216 ixgbe_allocate_queues(struct adapter *adapter)
2217 {
2218 device_t dev = adapter->dev;
2219 struct ix_queue *que;
2220 struct tx_ring *txr;
2221 struct rx_ring *rxr;
2222 int rsize, tsize, error = IXGBE_SUCCESS;
2223 int txconf = 0, rxconf = 0;
2224
2225 /* First, allocate the top level queue structs */
2226 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2227 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2228 if (adapter->queues == NULL) {
2229 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2230 error = ENOMEM;
2231 goto fail;
2232 }
2233
2234 /* Second, allocate the TX ring struct memory */
2235 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2236 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2237 if (adapter->tx_rings == NULL) {
2238 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2239 error = ENOMEM;
2240 goto tx_fail;
2241 }
2242
2243 /* Third, allocate the RX ring */
2244 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2245 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2246 if (adapter->rx_rings == NULL) {
2247 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2248 error = ENOMEM;
2249 goto rx_fail;
2250 }
2251
2252 /* For the ring itself */
2253 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2254 DBA_ALIGN);
2255
2256 /*
2257 * Now set up the TX queues, txconf is needed to handle the
2258 * possibility that things fail midcourse and we need to
2259 * undo memory gracefully
2260 */
2261 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2262 /* Set up some basics */
2263 txr = &adapter->tx_rings[i];
2264 txr->adapter = adapter;
2265 txr->txr_interq = NULL;
2266 /* In case SR-IOV is enabled, align the index properly */
2267 #ifdef PCI_IOV
2268 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2269 i);
2270 #else
2271 txr->me = i;
2272 #endif
2273 txr->num_desc = adapter->num_tx_desc;
2274
2275 /* Initialize the TX side lock */
2276 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2277
2278 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2279 BUS_DMA_NOWAIT)) {
2280 aprint_error_dev(dev,
2281 "Unable to allocate TX Descriptor memory\n");
2282 error = ENOMEM;
2283 goto err_tx_desc;
2284 }
2285 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2286 bzero((void *)txr->tx_base, tsize);
2287
2288 /* Now allocate transmit buffers for the ring */
2289 if (ixgbe_allocate_transmit_buffers(txr)) {
2290 aprint_error_dev(dev,
2291 "Critical Failure setting up transmit buffers\n");
2292 error = ENOMEM;
2293 goto err_tx_desc;
2294 }
2295 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2296 /* Allocate a buf ring */
2297 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2298 if (txr->txr_interq == NULL) {
2299 aprint_error_dev(dev,
2300 "Critical Failure setting up buf ring\n");
2301 error = ENOMEM;
2302 goto err_tx_desc;
2303 }
2304 }
2305 }
2306
2307 /*
2308 * Next the RX queues...
2309 */
2310 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2311 DBA_ALIGN);
2312 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2313 rxr = &adapter->rx_rings[i];
2314 /* Set up some basics */
2315 rxr->adapter = adapter;
2316 #ifdef PCI_IOV
2317 /* In case SR-IOV is enabled, align the index properly */
2318 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2319 i);
2320 #else
2321 rxr->me = i;
2322 #endif
2323 rxr->num_desc = adapter->num_rx_desc;
2324
2325 /* Initialize the RX side lock */
2326 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2327
2328 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2329 BUS_DMA_NOWAIT)) {
2330 aprint_error_dev(dev,
2331 "Unable to allocate RxDescriptor memory\n");
2332 error = ENOMEM;
2333 goto err_rx_desc;
2334 }
2335 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2336 bzero((void *)rxr->rx_base, rsize);
2337
2338 /* Allocate receive buffers for the ring */
2339 if (ixgbe_allocate_receive_buffers(rxr)) {
2340 aprint_error_dev(dev,
2341 "Critical Failure setting up receive buffers\n");
2342 error = ENOMEM;
2343 goto err_rx_desc;
2344 }
2345 }
2346
2347 /*
2348 * Finally set up the queue holding structs
2349 */
2350 for (int i = 0; i < adapter->num_queues; i++) {
2351 que = &adapter->queues[i];
2352 que->adapter = adapter;
2353 que->me = i;
2354 que->txr = &adapter->tx_rings[i];
2355 que->rxr = &adapter->rx_rings[i];
2356
2357 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2358 que->disabled_count = 0;
2359 }
2360
2361 return (0);
2362
2363 err_rx_desc:
2364 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2365 ixgbe_dma_free(adapter, &rxr->rxdma);
2366 err_tx_desc:
2367 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2368 ixgbe_dma_free(adapter, &txr->txdma);
2369 free(adapter->rx_rings, M_DEVBUF);
2370 rx_fail:
2371 free(adapter->tx_rings, M_DEVBUF);
2372 tx_fail:
2373 free(adapter->queues, M_DEVBUF);
2374 fail:
2375 return (error);
2376 } /* ixgbe_allocate_queues */
2377