ix_txrx.c revision 1.52 1 /* $NetBSD: ix_txrx.c,v 1.52 2019/02/22 06:49:15 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int ixgbe_tx_ctx_setup(struct tx_ring *,
109 struct mbuf *, u32 *, u32 *);
110 static int ixgbe_tso_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114 struct mbuf *, u32);
115 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
116 struct ixgbe_dma_alloc *, int);
117 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118
119 static void ixgbe_setup_hw_rsc(struct rx_ring *);
120
121 /************************************************************************
122 * ixgbe_legacy_start_locked - Transmit entry point
123 *
124 * Called by the stack to initiate a transmit.
125 * The driver will remain in this routine as long as there are
126 * packets to transmit and transmit resources are available.
127 * In case resources are not available, the stack is notified
128 * and the packet is requeued.
129 ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 int rc;
134 struct mbuf *m_head;
135 struct adapter *adapter = txr->adapter;
136
137 IXGBE_TX_LOCK_ASSERT(txr);
138
139 if (adapter->link_active != LINK_STATE_UP) {
140 /*
141 * discard all packets buffered in IFQ to avoid
142 * sending old packets at next link up timing.
143 */
144 ixgbe_drain(ifp, txr);
145 return (ENETDOWN);
146 }
147 if ((ifp->if_flags & IFF_RUNNING) == 0)
148 return (ENETDOWN);
149 if (txr->txr_no_space)
150 return (ENETDOWN);
151
152 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
153 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
154 break;
155
156 IFQ_POLL(&ifp->if_snd, m_head);
157 if (m_head == NULL)
158 break;
159
160 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
161 break;
162 }
163 IFQ_DEQUEUE(&ifp->if_snd, m_head);
164 if (rc != 0) {
165 m_freem(m_head);
166 continue;
167 }
168
169 /* Send a copy of the frame to the BPF listener */
170 bpf_mtap(ifp, m_head, BPF_D_OUT);
171 }
172
173 return IXGBE_SUCCESS;
174 } /* ixgbe_legacy_start_locked */
175
176 /************************************************************************
177 * ixgbe_legacy_start
178 *
179 * Called by the stack, this always uses the first tx ring,
180 * and should not be used with multiqueue tx enabled.
181 ************************************************************************/
182 void
183 ixgbe_legacy_start(struct ifnet *ifp)
184 {
185 struct adapter *adapter = ifp->if_softc;
186 struct tx_ring *txr = adapter->tx_rings;
187
188 if (ifp->if_flags & IFF_RUNNING) {
189 IXGBE_TX_LOCK(txr);
190 ixgbe_legacy_start_locked(ifp, txr);
191 IXGBE_TX_UNLOCK(txr);
192 }
193 } /* ixgbe_legacy_start */
194
195 /************************************************************************
196 * ixgbe_mq_start - Multiqueue Transmit Entry Point
197 *
198 * (if_transmit function)
199 ************************************************************************/
200 int
201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
202 {
203 struct adapter *adapter = ifp->if_softc;
204 struct tx_ring *txr;
205 int i;
206 #ifdef RSS
207 uint32_t bucket_id;
208 #endif
209
210 /*
211 * When doing RSS, map it to the same outbound queue
212 * as the incoming flow would be mapped to.
213 *
214 * If everything is setup correctly, it should be the
215 * same bucket that the current CPU we're on is.
216 */
217 #ifdef RSS
218 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
219 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
220 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
221 &bucket_id) == 0)) {
222 i = bucket_id % adapter->num_queues;
223 #ifdef IXGBE_DEBUG
224 if (bucket_id > adapter->num_queues)
225 if_printf(ifp,
226 "bucket_id (%d) > num_queues (%d)\n",
227 bucket_id, adapter->num_queues);
228 #endif
229 } else
230 i = m->m_pkthdr.flowid % adapter->num_queues;
231 } else
232 #endif /* 0 */
233 i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
234
235 /* Check for a hung queue and pick alternative */
236 if (((1 << i) & adapter->active_queues) == 0)
237 i = ffs64(adapter->active_queues);
238
239 txr = &adapter->tx_rings[i];
240
241 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
242 m_freem(m);
243 txr->pcq_drops.ev_count++;
244 return ENOBUFS;
245 }
246 if (IXGBE_TX_TRYLOCK(txr)) {
247 ixgbe_mq_start_locked(ifp, txr);
248 IXGBE_TX_UNLOCK(txr);
249 } else {
250 if (adapter->txrx_use_workqueue) {
251 u_int *enqueued;
252
253 /*
254 * This function itself is not called in interrupt
255 * context, however it can be called in fast softint
256 * context right after receiving forwarding packets.
257 * So, it is required to protect workqueue from twice
258 * enqueuing when the machine uses both spontaneous
259 * packets and forwarding packets.
260 */
261 enqueued = percpu_getref(adapter->txr_wq_enqueued);
262 if (*enqueued == 0) {
263 *enqueued = 1;
264 percpu_putref(adapter->txr_wq_enqueued);
265 workqueue_enqueue(adapter->txr_wq,
266 &txr->wq_cookie, curcpu());
267 } else
268 percpu_putref(adapter->txr_wq_enqueued);
269 } else
270 softint_schedule(txr->txr_si);
271 }
272
273 return (0);
274 } /* ixgbe_mq_start */
275
276 /************************************************************************
277 * ixgbe_mq_start_locked
278 ************************************************************************/
279 int
280 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
281 {
282 struct mbuf *next;
283 int enqueued = 0, err = 0;
284
285 if (txr->adapter->link_active != LINK_STATE_UP) {
286 /*
287 * discard all packets buffered in txr_interq to avoid
288 * sending old packets at next link up timing.
289 */
290 ixgbe_drain(ifp, txr);
291 return (ENETDOWN);
292 }
293 if ((ifp->if_flags & IFF_RUNNING) == 0)
294 return (ENETDOWN);
295 if (txr->txr_no_space)
296 return (ENETDOWN);
297
298 /* Process the queue */
299 while ((next = pcq_get(txr->txr_interq)) != NULL) {
300 if ((err = ixgbe_xmit(txr, next)) != 0) {
301 m_freem(next);
302 /* All errors are counted in ixgbe_xmit() */
303 break;
304 }
305 enqueued++;
306 #if __FreeBSD_version >= 1100036
307 /*
308 * Since we're looking at the tx ring, we can check
309 * to see if we're a VF by examing our tail register
310 * address.
311 */
312 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
313 (next->m_flags & M_MCAST))
314 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
315 #endif
316 /* Send a copy of the frame to the BPF listener */
317 bpf_mtap(ifp, next, BPF_D_OUT);
318 if ((ifp->if_flags & IFF_RUNNING) == 0)
319 break;
320 }
321
322 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
323 ixgbe_txeof(txr);
324
325 return (err);
326 } /* ixgbe_mq_start_locked */
327
328 /************************************************************************
329 * ixgbe_deferred_mq_start
330 *
331 * Called from a softint and workqueue (indirectly) to drain queued
332 * transmit packets.
333 ************************************************************************/
334 void
335 ixgbe_deferred_mq_start(void *arg)
336 {
337 struct tx_ring *txr = arg;
338 struct adapter *adapter = txr->adapter;
339 struct ifnet *ifp = adapter->ifp;
340
341 IXGBE_TX_LOCK(txr);
342 if (pcq_peek(txr->txr_interq) != NULL)
343 ixgbe_mq_start_locked(ifp, txr);
344 IXGBE_TX_UNLOCK(txr);
345 } /* ixgbe_deferred_mq_start */
346
347 /************************************************************************
348 * ixgbe_deferred_mq_start_work
349 *
350 * Called from a workqueue to drain queued transmit packets.
351 ************************************************************************/
352 void
353 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
354 {
355 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
356 struct adapter *adapter = txr->adapter;
357 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
358 *enqueued = 0;
359 percpu_putref(adapter->txr_wq_enqueued);
360
361 ixgbe_deferred_mq_start(txr);
362 } /* ixgbe_deferred_mq_start */
363
364 /************************************************************************
365 * ixgbe_drain_all
366 ************************************************************************/
367 void
368 ixgbe_drain_all(struct adapter *adapter)
369 {
370 struct ifnet *ifp = adapter->ifp;
371 struct ix_queue *que = adapter->queues;
372
373 for (int i = 0; i < adapter->num_queues; i++, que++) {
374 struct tx_ring *txr = que->txr;
375
376 IXGBE_TX_LOCK(txr);
377 ixgbe_drain(ifp, txr);
378 IXGBE_TX_UNLOCK(txr);
379 }
380 }
381
382 /************************************************************************
383 * ixgbe_xmit
384 *
385 * Maps the mbufs to tx descriptors, allowing the
386 * TX engine to transmit the packets.
387 *
388 * Return 0 on success, positive on failure
389 ************************************************************************/
390 static int
391 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
392 {
393 struct adapter *adapter = txr->adapter;
394 struct ixgbe_tx_buf *txbuf;
395 union ixgbe_adv_tx_desc *txd = NULL;
396 struct ifnet *ifp = adapter->ifp;
397 int i, j, error;
398 int first;
399 u32 olinfo_status = 0, cmd_type_len;
400 bool remap = TRUE;
401 bus_dmamap_t map;
402
403 /* Basic descriptor defines */
404 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
405 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
406
407 if (vlan_has_tag(m_head))
408 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
409
410 /*
411 * Important to capture the first descriptor
412 * used because it will contain the index of
413 * the one we tell the hardware to report back
414 */
415 first = txr->next_avail_desc;
416 txbuf = &txr->tx_buffers[first];
417 map = txbuf->map;
418
419 /*
420 * Map the packet for DMA.
421 */
422 retry:
423 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
424 BUS_DMA_NOWAIT);
425
426 if (__predict_false(error)) {
427 struct mbuf *m;
428
429 switch (error) {
430 case EAGAIN:
431 txr->q_eagain_tx_dma_setup++;
432 return EAGAIN;
433 case ENOMEM:
434 txr->q_enomem_tx_dma_setup++;
435 return EAGAIN;
436 case EFBIG:
437 /* Try it again? - one try */
438 if (remap == TRUE) {
439 remap = FALSE;
440 /*
441 * XXX: m_defrag will choke on
442 * non-MCLBYTES-sized clusters
443 */
444 txr->q_efbig_tx_dma_setup++;
445 m = m_defrag(m_head, M_NOWAIT);
446 if (m == NULL) {
447 txr->q_mbuf_defrag_failed++;
448 return ENOBUFS;
449 }
450 m_head = m;
451 goto retry;
452 } else {
453 txr->q_efbig2_tx_dma_setup++;
454 return error;
455 }
456 case EINVAL:
457 txr->q_einval_tx_dma_setup++;
458 return error;
459 default:
460 txr->q_other_tx_dma_setup++;
461 return error;
462 }
463 }
464
465 /* Make certain there are enough descriptors */
466 if (txr->tx_avail < (map->dm_nsegs + 2)) {
467 txr->txr_no_space = true;
468 txr->no_desc_avail.ev_count++;
469 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
470 return EAGAIN;
471 }
472
473 /*
474 * Set up the appropriate offload context
475 * this will consume the first descriptor
476 */
477 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
478 if (__predict_false(error)) {
479 return (error);
480 }
481
482 /* Do the flow director magic */
483 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
484 (txr->atr_sample) && (!adapter->fdir_reinit)) {
485 ++txr->atr_count;
486 if (txr->atr_count >= atr_sample_rate) {
487 ixgbe_atr(txr, m_head);
488 txr->atr_count = 0;
489 }
490 }
491
492 olinfo_status |= IXGBE_ADVTXD_CC;
493 i = txr->next_avail_desc;
494 for (j = 0; j < map->dm_nsegs; j++) {
495 bus_size_t seglen;
496 bus_addr_t segaddr;
497
498 txbuf = &txr->tx_buffers[i];
499 txd = &txr->tx_base[i];
500 seglen = map->dm_segs[j].ds_len;
501 segaddr = htole64(map->dm_segs[j].ds_addr);
502
503 txd->read.buffer_addr = segaddr;
504 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
505 txd->read.olinfo_status = htole32(olinfo_status);
506
507 if (++i == txr->num_desc)
508 i = 0;
509 }
510
511 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
512 txr->tx_avail -= map->dm_nsegs;
513 txr->next_avail_desc = i;
514
515 txbuf->m_head = m_head;
516 /*
517 * Here we swap the map so the last descriptor,
518 * which gets the completion interrupt has the
519 * real map, and the first descriptor gets the
520 * unused map from this descriptor.
521 */
522 txr->tx_buffers[first].map = txbuf->map;
523 txbuf->map = map;
524 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
525 BUS_DMASYNC_PREWRITE);
526
527 /* Set the EOP descriptor that will be marked done */
528 txbuf = &txr->tx_buffers[first];
529 txbuf->eop = txd;
530
531 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
532 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
533 /*
534 * Advance the Transmit Descriptor Tail (Tdt), this tells the
535 * hardware that this frame is available to transmit.
536 */
537 ++txr->total_packets.ev_count;
538 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
539
540 /*
541 * XXXX NOMPSAFE: ifp->if_data should be percpu.
542 */
543 ifp->if_obytes += m_head->m_pkthdr.len;
544 if (m_head->m_flags & M_MCAST)
545 ifp->if_omcasts++;
546
547 /* Mark queue as having work */
548 if (txr->busy == 0)
549 txr->busy = 1;
550
551 return (0);
552 } /* ixgbe_xmit */
553
554 /************************************************************************
555 * ixgbe_drain
556 ************************************************************************/
557 static void
558 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
559 {
560 struct mbuf *m;
561
562 IXGBE_TX_LOCK_ASSERT(txr);
563
564 if (txr->me == 0) {
565 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
566 IFQ_DEQUEUE(&ifp->if_snd, m);
567 m_freem(m);
568 IF_DROP(&ifp->if_snd);
569 }
570 }
571
572 while ((m = pcq_get(txr->txr_interq)) != NULL) {
573 m_freem(m);
574 txr->pcq_drops.ev_count++;
575 }
576 }
577
578 /************************************************************************
579 * ixgbe_allocate_transmit_buffers
580 *
581 * Allocate memory for tx_buffer structures. The tx_buffer stores all
582 * the information needed to transmit a packet on the wire. This is
583 * called only once at attach, setup is done every reset.
584 ************************************************************************/
585 static int
586 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
587 {
588 struct adapter *adapter = txr->adapter;
589 device_t dev = adapter->dev;
590 struct ixgbe_tx_buf *txbuf;
591 int error, i;
592
593 /*
594 * Setup DMA descriptor areas.
595 */
596 error = ixgbe_dma_tag_create(
597 /* parent */ adapter->osdep.dmat,
598 /* alignment */ 1,
599 /* bounds */ 0,
600 /* maxsize */ IXGBE_TSO_SIZE,
601 /* nsegments */ adapter->num_segs,
602 /* maxsegsize */ PAGE_SIZE,
603 /* flags */ 0,
604 &txr->txtag);
605 if (error != 0) {
606 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
607 goto fail;
608 }
609
610 txr->tx_buffers =
611 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
612 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
613 if (txr->tx_buffers == NULL) {
614 aprint_error_dev(dev, "Unable to allocate tx_buffer memory\n");
615 error = ENOMEM;
616 goto fail;
617 }
618
619 /* Create the descriptor buffer dma maps */
620 txbuf = txr->tx_buffers;
621 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
622 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
623 if (error != 0) {
624 aprint_error_dev(dev,
625 "Unable to create TX DMA map (%d)\n", error);
626 goto fail;
627 }
628 }
629
630 return 0;
631 fail:
632 /* We free all, it handles case where we are in the middle */
633 #if 0 /* XXX was FreeBSD */
634 ixgbe_free_transmit_structures(adapter);
635 #else
636 ixgbe_free_transmit_buffers(txr);
637 #endif
638 return (error);
639 } /* ixgbe_allocate_transmit_buffers */
640
641 /************************************************************************
642 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
643 ************************************************************************/
644 static void
645 ixgbe_setup_transmit_ring(struct tx_ring *txr)
646 {
647 struct adapter *adapter = txr->adapter;
648 struct ixgbe_tx_buf *txbuf;
649 #ifdef DEV_NETMAP
650 struct netmap_adapter *na = NA(adapter->ifp);
651 struct netmap_slot *slot;
652 #endif /* DEV_NETMAP */
653
654 /* Clear the old ring contents */
655 IXGBE_TX_LOCK(txr);
656
657 #ifdef DEV_NETMAP
658 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
659 /*
660 * (under lock): if in netmap mode, do some consistency
661 * checks and set slot to entry 0 of the netmap ring.
662 */
663 slot = netmap_reset(na, NR_TX, txr->me, 0);
664 }
665 #endif /* DEV_NETMAP */
666
667 bzero((void *)txr->tx_base,
668 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
669 /* Reset indices */
670 txr->next_avail_desc = 0;
671 txr->next_to_clean = 0;
672
673 /* Free any existing tx buffers. */
674 txbuf = txr->tx_buffers;
675 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
676 if (txbuf->m_head != NULL) {
677 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
678 0, txbuf->m_head->m_pkthdr.len,
679 BUS_DMASYNC_POSTWRITE);
680 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
681 m_freem(txbuf->m_head);
682 txbuf->m_head = NULL;
683 }
684
685 #ifdef DEV_NETMAP
686 /*
687 * In netmap mode, set the map for the packet buffer.
688 * NOTE: Some drivers (not this one) also need to set
689 * the physical buffer address in the NIC ring.
690 * Slots in the netmap ring (indexed by "si") are
691 * kring->nkr_hwofs positions "ahead" wrt the
692 * corresponding slot in the NIC ring. In some drivers
693 * (not here) nkr_hwofs can be negative. Function
694 * netmap_idx_n2k() handles wraparounds properly.
695 */
696 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
697 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
698 netmap_load_map(na, txr->txtag,
699 txbuf->map, NMB(na, slot + si));
700 }
701 #endif /* DEV_NETMAP */
702
703 /* Clear the EOP descriptor pointer */
704 txbuf->eop = NULL;
705 }
706
707 /* Set the rate at which we sample packets */
708 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
709 txr->atr_sample = atr_sample_rate;
710
711 /* Set number of descriptors available */
712 txr->tx_avail = adapter->num_tx_desc;
713
714 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
715 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
716 IXGBE_TX_UNLOCK(txr);
717 } /* ixgbe_setup_transmit_ring */
718
719 /************************************************************************
720 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
721 ************************************************************************/
722 int
723 ixgbe_setup_transmit_structures(struct adapter *adapter)
724 {
725 struct tx_ring *txr = adapter->tx_rings;
726
727 for (int i = 0; i < adapter->num_queues; i++, txr++)
728 ixgbe_setup_transmit_ring(txr);
729
730 return (0);
731 } /* ixgbe_setup_transmit_structures */
732
733 /************************************************************************
734 * ixgbe_free_transmit_structures - Free all transmit rings.
735 ************************************************************************/
736 void
737 ixgbe_free_transmit_structures(struct adapter *adapter)
738 {
739 struct tx_ring *txr = adapter->tx_rings;
740
741 for (int i = 0; i < adapter->num_queues; i++, txr++) {
742 ixgbe_free_transmit_buffers(txr);
743 ixgbe_dma_free(adapter, &txr->txdma);
744 IXGBE_TX_LOCK_DESTROY(txr);
745 }
746 free(adapter->tx_rings, M_DEVBUF);
747 } /* ixgbe_free_transmit_structures */
748
749 /************************************************************************
750 * ixgbe_free_transmit_buffers
751 *
752 * Free transmit ring related data structures.
753 ************************************************************************/
754 static void
755 ixgbe_free_transmit_buffers(struct tx_ring *txr)
756 {
757 struct adapter *adapter = txr->adapter;
758 struct ixgbe_tx_buf *tx_buffer;
759 int i;
760
761 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
762
763 if (txr->tx_buffers == NULL)
764 return;
765
766 tx_buffer = txr->tx_buffers;
767 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
768 if (tx_buffer->m_head != NULL) {
769 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
770 0, tx_buffer->m_head->m_pkthdr.len,
771 BUS_DMASYNC_POSTWRITE);
772 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
773 m_freem(tx_buffer->m_head);
774 tx_buffer->m_head = NULL;
775 if (tx_buffer->map != NULL) {
776 ixgbe_dmamap_destroy(txr->txtag,
777 tx_buffer->map);
778 tx_buffer->map = NULL;
779 }
780 } else if (tx_buffer->map != NULL) {
781 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
782 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
783 tx_buffer->map = NULL;
784 }
785 }
786 if (txr->txr_interq != NULL) {
787 struct mbuf *m;
788
789 while ((m = pcq_get(txr->txr_interq)) != NULL)
790 m_freem(m);
791 pcq_destroy(txr->txr_interq);
792 }
793 if (txr->tx_buffers != NULL) {
794 free(txr->tx_buffers, M_DEVBUF);
795 txr->tx_buffers = NULL;
796 }
797 if (txr->txtag != NULL) {
798 ixgbe_dma_tag_destroy(txr->txtag);
799 txr->txtag = NULL;
800 }
801 } /* ixgbe_free_transmit_buffers */
802
803 /************************************************************************
804 * ixgbe_tx_ctx_setup
805 *
806 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
807 ************************************************************************/
808 static int
809 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
810 u32 *cmd_type_len, u32 *olinfo_status)
811 {
812 struct adapter *adapter = txr->adapter;
813 struct ixgbe_adv_tx_context_desc *TXD;
814 struct ether_vlan_header *eh;
815 #ifdef INET
816 struct ip *ip;
817 #endif
818 #ifdef INET6
819 struct ip6_hdr *ip6;
820 #endif
821 int ehdrlen, ip_hlen = 0;
822 int offload = TRUE;
823 int ctxd = txr->next_avail_desc;
824 u32 vlan_macip_lens = 0;
825 u32 type_tucmd_mlhl = 0;
826 u16 vtag = 0;
827 u16 etype;
828 u8 ipproto = 0;
829 char *l3d;
830
831
832 /* First check if TSO is to be used */
833 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
834 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
835
836 if (rv != 0)
837 ++adapter->tso_err.ev_count;
838 return rv;
839 }
840
841 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
842 offload = FALSE;
843
844 /* Indicate the whole packet as payload when not doing TSO */
845 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
846
847 /* Now ready a context descriptor */
848 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
849
850 /*
851 * In advanced descriptors the vlan tag must
852 * be placed into the context descriptor. Hence
853 * we need to make one even if not doing offloads.
854 */
855 if (vlan_has_tag(mp)) {
856 vtag = htole16(vlan_get_tag(mp));
857 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
858 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
859 (offload == FALSE))
860 return (0);
861
862 /*
863 * Determine where frame payload starts.
864 * Jump over vlan headers if already present,
865 * helpful for QinQ too.
866 */
867 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
868 eh = mtod(mp, struct ether_vlan_header *);
869 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
870 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
871 etype = ntohs(eh->evl_proto);
872 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
873 } else {
874 etype = ntohs(eh->evl_encap_proto);
875 ehdrlen = ETHER_HDR_LEN;
876 }
877
878 /* Set the ether header length */
879 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
880
881 if (offload == FALSE)
882 goto no_offloads;
883
884 /*
885 * If the first mbuf only includes the ethernet header,
886 * jump to the next one
887 * XXX: This assumes the stack splits mbufs containing headers
888 * on header boundaries
889 * XXX: And assumes the entire IP header is contained in one mbuf
890 */
891 if (mp->m_len == ehdrlen && mp->m_next)
892 l3d = mtod(mp->m_next, char *);
893 else
894 l3d = mtod(mp, char *) + ehdrlen;
895
896 switch (etype) {
897 #ifdef INET
898 case ETHERTYPE_IP:
899 ip = (struct ip *)(l3d);
900 ip_hlen = ip->ip_hl << 2;
901 ipproto = ip->ip_p;
902 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
903 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
904 ip->ip_sum == 0);
905 break;
906 #endif
907 #ifdef INET6
908 case ETHERTYPE_IPV6:
909 ip6 = (struct ip6_hdr *)(l3d);
910 ip_hlen = sizeof(struct ip6_hdr);
911 ipproto = ip6->ip6_nxt;
912 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
913 break;
914 #endif
915 default:
916 offload = false;
917 break;
918 }
919
920 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
921 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
922
923 vlan_macip_lens |= ip_hlen;
924
925 /* No support for offloads for non-L4 next headers */
926 switch (ipproto) {
927 case IPPROTO_TCP:
928 if (mp->m_pkthdr.csum_flags &
929 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
930 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
931 else
932 offload = false;
933 break;
934 case IPPROTO_UDP:
935 if (mp->m_pkthdr.csum_flags &
936 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
937 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
938 else
939 offload = false;
940 break;
941 default:
942 offload = false;
943 break;
944 }
945
946 if (offload) /* Insert L4 checksum into data descriptors */
947 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
948
949 no_offloads:
950 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
951
952 /* Now copy bits into descriptor */
953 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
954 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
955 TXD->seqnum_seed = htole32(0);
956 TXD->mss_l4len_idx = htole32(0);
957
958 /* We've consumed the first desc, adjust counters */
959 if (++ctxd == txr->num_desc)
960 ctxd = 0;
961 txr->next_avail_desc = ctxd;
962 --txr->tx_avail;
963
964 return (0);
965 } /* ixgbe_tx_ctx_setup */
966
967 /************************************************************************
968 * ixgbe_tso_setup
969 *
970 * Setup work for hardware segmentation offload (TSO) on
971 * adapters using advanced tx descriptors
972 ************************************************************************/
973 static int
974 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
975 u32 *olinfo_status)
976 {
977 struct ixgbe_adv_tx_context_desc *TXD;
978 struct ether_vlan_header *eh;
979 #ifdef INET6
980 struct ip6_hdr *ip6;
981 #endif
982 #ifdef INET
983 struct ip *ip;
984 #endif
985 struct tcphdr *th;
986 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
987 u32 vlan_macip_lens = 0;
988 u32 type_tucmd_mlhl = 0;
989 u32 mss_l4len_idx = 0, paylen;
990 u16 vtag = 0, eh_type;
991
992 /*
993 * Determine where frame payload starts.
994 * Jump over vlan headers if already present
995 */
996 eh = mtod(mp, struct ether_vlan_header *);
997 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
998 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
999 eh_type = eh->evl_proto;
1000 } else {
1001 ehdrlen = ETHER_HDR_LEN;
1002 eh_type = eh->evl_encap_proto;
1003 }
1004
1005 switch (ntohs(eh_type)) {
1006 #ifdef INET
1007 case ETHERTYPE_IP:
1008 ip = (struct ip *)(mp->m_data + ehdrlen);
1009 if (ip->ip_p != IPPROTO_TCP)
1010 return (ENXIO);
1011 ip->ip_sum = 0;
1012 ip_hlen = ip->ip_hl << 2;
1013 th = (struct tcphdr *)((char *)ip + ip_hlen);
1014 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1015 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1016 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1017 /* Tell transmit desc to also do IPv4 checksum. */
1018 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1019 break;
1020 #endif
1021 #ifdef INET6
1022 case ETHERTYPE_IPV6:
1023 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1024 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1025 if (ip6->ip6_nxt != IPPROTO_TCP)
1026 return (ENXIO);
1027 ip_hlen = sizeof(struct ip6_hdr);
1028 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1029 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1030 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1031 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1032 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1033 break;
1034 #endif
1035 default:
1036 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1037 __func__, ntohs(eh_type));
1038 break;
1039 }
1040
1041 ctxd = txr->next_avail_desc;
1042 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1043
1044 tcp_hlen = th->th_off << 2;
1045
1046 /* This is used in the transmit desc in encap */
1047 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1048
1049 /* VLAN MACLEN IPLEN */
1050 if (vlan_has_tag(mp)) {
1051 vtag = htole16(vlan_get_tag(mp));
1052 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1053 }
1054
1055 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1056 vlan_macip_lens |= ip_hlen;
1057 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1058
1059 /* ADV DTYPE TUCMD */
1060 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1061 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1062 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1063
1064 /* MSS L4LEN IDX */
1065 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1066 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1067 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1068
1069 TXD->seqnum_seed = htole32(0);
1070
1071 if (++ctxd == txr->num_desc)
1072 ctxd = 0;
1073
1074 txr->tx_avail--;
1075 txr->next_avail_desc = ctxd;
1076 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1077 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1078 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1079 ++txr->tso_tx.ev_count;
1080
1081 return (0);
1082 } /* ixgbe_tso_setup */
1083
1084
1085 /************************************************************************
1086 * ixgbe_txeof
1087 *
1088 * Examine each tx_buffer in the used queue. If the hardware is done
1089 * processing the packet then free associated resources. The
1090 * tx_buffer is put back on the free queue.
1091 ************************************************************************/
1092 bool
1093 ixgbe_txeof(struct tx_ring *txr)
1094 {
1095 struct adapter *adapter = txr->adapter;
1096 struct ifnet *ifp = adapter->ifp;
1097 struct ixgbe_tx_buf *buf;
1098 union ixgbe_adv_tx_desc *txd;
1099 u32 work, processed = 0;
1100 u32 limit = adapter->tx_process_limit;
1101
1102 KASSERT(mutex_owned(&txr->tx_mtx));
1103
1104 #ifdef DEV_NETMAP
1105 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1106 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1107 struct netmap_adapter *na = NA(adapter->ifp);
1108 struct netmap_kring *kring = &na->tx_rings[txr->me];
1109 txd = txr->tx_base;
1110 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1111 BUS_DMASYNC_POSTREAD);
1112 /*
1113 * In netmap mode, all the work is done in the context
1114 * of the client thread. Interrupt handlers only wake up
1115 * clients, which may be sleeping on individual rings
1116 * or on a global resource for all rings.
1117 * To implement tx interrupt mitigation, we wake up the client
1118 * thread roughly every half ring, even if the NIC interrupts
1119 * more frequently. This is implemented as follows:
1120 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1121 * the slot that should wake up the thread (nkr_num_slots
1122 * means the user thread should not be woken up);
1123 * - the driver ignores tx interrupts unless netmap_mitigate=0
1124 * or the slot has the DD bit set.
1125 */
1126 if (!netmap_mitigate ||
1127 (kring->nr_kflags < kring->nkr_num_slots &&
1128 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1129 netmap_tx_irq(ifp, txr->me);
1130 }
1131 return false;
1132 }
1133 #endif /* DEV_NETMAP */
1134
1135 if (txr->tx_avail == txr->num_desc) {
1136 txr->busy = 0;
1137 return false;
1138 }
1139
1140 /* Get work starting point */
1141 work = txr->next_to_clean;
1142 buf = &txr->tx_buffers[work];
1143 txd = &txr->tx_base[work];
1144 work -= txr->num_desc; /* The distance to ring end */
1145 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1146 BUS_DMASYNC_POSTREAD);
1147
1148 do {
1149 union ixgbe_adv_tx_desc *eop = buf->eop;
1150 if (eop == NULL) /* No work */
1151 break;
1152
1153 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1154 break; /* I/O not complete */
1155
1156 if (buf->m_head) {
1157 txr->bytes += buf->m_head->m_pkthdr.len;
1158 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1159 0, buf->m_head->m_pkthdr.len,
1160 BUS_DMASYNC_POSTWRITE);
1161 ixgbe_dmamap_unload(txr->txtag, buf->map);
1162 m_freem(buf->m_head);
1163 buf->m_head = NULL;
1164 }
1165 buf->eop = NULL;
1166 txr->txr_no_space = false;
1167 ++txr->tx_avail;
1168
1169 /* We clean the range if multi segment */
1170 while (txd != eop) {
1171 ++txd;
1172 ++buf;
1173 ++work;
1174 /* wrap the ring? */
1175 if (__predict_false(!work)) {
1176 work -= txr->num_desc;
1177 buf = txr->tx_buffers;
1178 txd = txr->tx_base;
1179 }
1180 if (buf->m_head) {
1181 txr->bytes +=
1182 buf->m_head->m_pkthdr.len;
1183 bus_dmamap_sync(txr->txtag->dt_dmat,
1184 buf->map,
1185 0, buf->m_head->m_pkthdr.len,
1186 BUS_DMASYNC_POSTWRITE);
1187 ixgbe_dmamap_unload(txr->txtag,
1188 buf->map);
1189 m_freem(buf->m_head);
1190 buf->m_head = NULL;
1191 }
1192 ++txr->tx_avail;
1193 buf->eop = NULL;
1194
1195 }
1196 ++txr->packets;
1197 ++processed;
1198 ++ifp->if_opackets;
1199
1200 /* Try the next packet */
1201 ++txd;
1202 ++buf;
1203 ++work;
1204 /* reset with a wrap */
1205 if (__predict_false(!work)) {
1206 work -= txr->num_desc;
1207 buf = txr->tx_buffers;
1208 txd = txr->tx_base;
1209 }
1210 prefetch(txd);
1211 } while (__predict_true(--limit));
1212
1213 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1214 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1215
1216 work += txr->num_desc;
1217 txr->next_to_clean = work;
1218
1219 /*
1220 * Queue Hang detection, we know there's
1221 * work outstanding or the first return
1222 * would have been taken, so increment busy
1223 * if nothing managed to get cleaned, then
1224 * in local_timer it will be checked and
1225 * marked as HUNG if it exceeds a MAX attempt.
1226 */
1227 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1228 ++txr->busy;
1229 /*
1230 * If anything gets cleaned we reset state to 1,
1231 * note this will turn off HUNG if its set.
1232 */
1233 if (processed)
1234 txr->busy = 1;
1235
1236 if (txr->tx_avail == txr->num_desc)
1237 txr->busy = 0;
1238
1239 return ((limit > 0) ? false : true);
1240 } /* ixgbe_txeof */
1241
1242 /************************************************************************
1243 * ixgbe_rsc_count
1244 *
1245 * Used to detect a descriptor that has been merged by Hardware RSC.
1246 ************************************************************************/
1247 static inline u32
1248 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1249 {
1250 return (le32toh(rx->wb.lower.lo_dword.data) &
1251 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1252 } /* ixgbe_rsc_count */
1253
1254 /************************************************************************
1255 * ixgbe_setup_hw_rsc
1256 *
1257 * Initialize Hardware RSC (LRO) feature on 82599
1258 * for an RX ring, this is toggled by the LRO capability
1259 * even though it is transparent to the stack.
1260 *
1261 * NOTE: Since this HW feature only works with IPv4 and
1262 * testing has shown soft LRO to be as effective,
1263 * this feature will be disabled by default.
1264 ************************************************************************/
1265 static void
1266 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1267 {
1268 struct adapter *adapter = rxr->adapter;
1269 struct ixgbe_hw *hw = &adapter->hw;
1270 u32 rscctrl, rdrxctl;
1271
1272 /* If turning LRO/RSC off we need to disable it */
1273 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1274 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1275 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1276 return;
1277 }
1278
1279 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1280 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1281 #ifdef DEV_NETMAP
1282 /* Always strip CRC unless Netmap disabled it */
1283 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1284 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1285 ix_crcstrip)
1286 #endif /* DEV_NETMAP */
1287 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1288 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1289 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1290
1291 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1292 rscctrl |= IXGBE_RSCCTL_RSCEN;
1293 /*
1294 * Limit the total number of descriptors that
1295 * can be combined, so it does not exceed 64K
1296 */
1297 if (rxr->mbuf_sz == MCLBYTES)
1298 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1299 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1300 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1301 else if (rxr->mbuf_sz == MJUM9BYTES)
1302 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1303 else /* Using 16K cluster */
1304 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1305
1306 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1307
1308 /* Enable TCP header recognition */
1309 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1310 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1311
1312 /* Disable RSC for ACK packets */
1313 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1314 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1315
1316 rxr->hw_rsc = TRUE;
1317 } /* ixgbe_setup_hw_rsc */
1318
1319 /************************************************************************
1320 * ixgbe_refresh_mbufs
1321 *
1322 * Refresh mbuf buffers for RX descriptor rings
1323 * - now keeps its own state so discards due to resource
1324 * exhaustion are unnecessary, if an mbuf cannot be obtained
1325 * it just returns, keeping its placeholder, thus it can simply
1326 * be recalled to try again.
1327 ************************************************************************/
1328 static void
1329 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1330 {
1331 struct adapter *adapter = rxr->adapter;
1332 struct ixgbe_rx_buf *rxbuf;
1333 struct mbuf *mp;
1334 int i, j, error;
1335 bool refreshed = false;
1336
1337 i = j = rxr->next_to_refresh;
1338 /* Control the loop with one beyond */
1339 if (++j == rxr->num_desc)
1340 j = 0;
1341
1342 while (j != limit) {
1343 rxbuf = &rxr->rx_buffers[i];
1344 if (rxbuf->buf == NULL) {
1345 mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1346 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1347 if (mp == NULL) {
1348 rxr->no_jmbuf.ev_count++;
1349 goto update;
1350 }
1351 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1352 m_adj(mp, ETHER_ALIGN);
1353 } else
1354 mp = rxbuf->buf;
1355
1356 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1357
1358 /* If we're dealing with an mbuf that was copied rather
1359 * than replaced, there's no need to go through busdma.
1360 */
1361 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1362 /* Get the memory mapping */
1363 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1364 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1365 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1366 if (error != 0) {
1367 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1368 m_free(mp);
1369 rxbuf->buf = NULL;
1370 goto update;
1371 }
1372 rxbuf->buf = mp;
1373 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1374 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1375 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1376 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1377 } else {
1378 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1379 rxbuf->flags &= ~IXGBE_RX_COPY;
1380 }
1381
1382 refreshed = true;
1383 /* Next is precalculated */
1384 i = j;
1385 rxr->next_to_refresh = i;
1386 if (++j == rxr->num_desc)
1387 j = 0;
1388 }
1389
1390 update:
1391 if (refreshed) /* Update hardware tail index */
1392 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1393
1394 return;
1395 } /* ixgbe_refresh_mbufs */
1396
1397 /************************************************************************
1398 * ixgbe_allocate_receive_buffers
1399 *
1400 * Allocate memory for rx_buffer structures. Since we use one
1401 * rx_buffer per received packet, the maximum number of rx_buffer's
1402 * that we'll need is equal to the number of receive descriptors
1403 * that we've allocated.
1404 ************************************************************************/
1405 static int
1406 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1407 {
1408 struct adapter *adapter = rxr->adapter;
1409 device_t dev = adapter->dev;
1410 struct ixgbe_rx_buf *rxbuf;
1411 int bsize, error;
1412
1413 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1414 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1415 M_NOWAIT | M_ZERO);
1416 if (rxr->rx_buffers == NULL) {
1417 aprint_error_dev(dev, "Unable to allocate rx_buffer memory\n");
1418 error = ENOMEM;
1419 goto fail;
1420 }
1421
1422 error = ixgbe_dma_tag_create(
1423 /* parent */ adapter->osdep.dmat,
1424 /* alignment */ 1,
1425 /* bounds */ 0,
1426 /* maxsize */ MJUM16BYTES,
1427 /* nsegments */ 1,
1428 /* maxsegsize */ MJUM16BYTES,
1429 /* flags */ 0,
1430 &rxr->ptag);
1431 if (error != 0) {
1432 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1433 goto fail;
1434 }
1435
1436 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1437 rxbuf = &rxr->rx_buffers[i];
1438 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1439 if (error) {
1440 aprint_error_dev(dev, "Unable to create RX dma map\n");
1441 goto fail;
1442 }
1443 }
1444
1445 return (0);
1446
1447 fail:
1448 /* Frees all, but can handle partial completion */
1449 ixgbe_free_receive_structures(adapter);
1450
1451 return (error);
1452 } /* ixgbe_allocate_receive_buffers */
1453
1454 /************************************************************************
1455 * ixgbe_free_receive_ring
1456 ************************************************************************/
1457 static void
1458 ixgbe_free_receive_ring(struct rx_ring *rxr)
1459 {
1460 for (int i = 0; i < rxr->num_desc; i++) {
1461 ixgbe_rx_discard(rxr, i);
1462 }
1463 } /* ixgbe_free_receive_ring */
1464
1465 /************************************************************************
1466 * ixgbe_setup_receive_ring
1467 *
1468 * Initialize a receive ring and its buffers.
1469 ************************************************************************/
1470 static int
1471 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1472 {
1473 struct adapter *adapter;
1474 struct ixgbe_rx_buf *rxbuf;
1475 #ifdef LRO
1476 struct ifnet *ifp;
1477 struct lro_ctrl *lro = &rxr->lro;
1478 #endif /* LRO */
1479 #ifdef DEV_NETMAP
1480 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1481 struct netmap_slot *slot;
1482 #endif /* DEV_NETMAP */
1483 int rsize, error = 0;
1484
1485 adapter = rxr->adapter;
1486 #ifdef LRO
1487 ifp = adapter->ifp;
1488 #endif /* LRO */
1489
1490 /* Clear the ring contents */
1491 IXGBE_RX_LOCK(rxr);
1492
1493 #ifdef DEV_NETMAP
1494 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1495 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1496 #endif /* DEV_NETMAP */
1497
1498 rsize = roundup2(adapter->num_rx_desc *
1499 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1500 bzero((void *)rxr->rx_base, rsize);
1501 /* Cache the size */
1502 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1503
1504 /* Free current RX buffer structs and their mbufs */
1505 ixgbe_free_receive_ring(rxr);
1506
1507 IXGBE_RX_UNLOCK(rxr);
1508 /*
1509 * Now reinitialize our supply of jumbo mbufs. The number
1510 * or size of jumbo mbufs may have changed.
1511 * Assume all of rxr->ptag are the same.
1512 */
1513 ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
1514 (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
1515
1516 IXGBE_RX_LOCK(rxr);
1517
1518 /* Now replenish the mbufs */
1519 for (int j = 0; j != rxr->num_desc; ++j) {
1520 struct mbuf *mp;
1521
1522 rxbuf = &rxr->rx_buffers[j];
1523
1524 #ifdef DEV_NETMAP
1525 /*
1526 * In netmap mode, fill the map and set the buffer
1527 * address in the NIC ring, considering the offset
1528 * between the netmap and NIC rings (see comment in
1529 * ixgbe_setup_transmit_ring() ). No need to allocate
1530 * an mbuf, so end the block with a continue;
1531 */
1532 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1533 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1534 uint64_t paddr;
1535 void *addr;
1536
1537 addr = PNMB(na, slot + sj, &paddr);
1538 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1539 /* Update descriptor and the cached value */
1540 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1541 rxbuf->addr = htole64(paddr);
1542 continue;
1543 }
1544 #endif /* DEV_NETMAP */
1545
1546 rxbuf->flags = 0;
1547 rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1548 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1549 if (rxbuf->buf == NULL) {
1550 error = ENOBUFS;
1551 goto fail;
1552 }
1553 mp = rxbuf->buf;
1554 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1555 /* Get the memory mapping */
1556 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1557 mp, BUS_DMA_NOWAIT);
1558 if (error != 0)
1559 goto fail;
1560 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1561 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1562 /* Update the descriptor and the cached value */
1563 rxr->rx_base[j].read.pkt_addr =
1564 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1565 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1566 }
1567
1568
1569 /* Setup our descriptor indices */
1570 rxr->next_to_check = 0;
1571 rxr->next_to_refresh = 0;
1572 rxr->lro_enabled = FALSE;
1573 rxr->rx_copies.ev_count = 0;
1574 #if 0 /* NetBSD */
1575 rxr->rx_bytes.ev_count = 0;
1576 #if 1 /* Fix inconsistency */
1577 rxr->rx_packets.ev_count = 0;
1578 #endif
1579 #endif
1580 rxr->vtag_strip = FALSE;
1581
1582 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1583 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1584
1585 /*
1586 * Now set up the LRO interface
1587 */
1588 if (ixgbe_rsc_enable)
1589 ixgbe_setup_hw_rsc(rxr);
1590 #ifdef LRO
1591 else if (ifp->if_capenable & IFCAP_LRO) {
1592 device_t dev = adapter->dev;
1593 int err = tcp_lro_init(lro);
1594 if (err) {
1595 device_printf(dev, "LRO Initialization failed!\n");
1596 goto fail;
1597 }
1598 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1599 rxr->lro_enabled = TRUE;
1600 lro->ifp = adapter->ifp;
1601 }
1602 #endif /* LRO */
1603
1604 IXGBE_RX_UNLOCK(rxr);
1605
1606 return (0);
1607
1608 fail:
1609 ixgbe_free_receive_ring(rxr);
1610 IXGBE_RX_UNLOCK(rxr);
1611
1612 return (error);
1613 } /* ixgbe_setup_receive_ring */
1614
1615 /************************************************************************
1616 * ixgbe_setup_receive_structures - Initialize all receive rings.
1617 ************************************************************************/
1618 int
1619 ixgbe_setup_receive_structures(struct adapter *adapter)
1620 {
1621 struct rx_ring *rxr = adapter->rx_rings;
1622 int j;
1623
1624 for (j = 0; j < adapter->num_queues; j++, rxr++)
1625 if (ixgbe_setup_receive_ring(rxr))
1626 goto fail;
1627
1628 return (0);
1629 fail:
1630 /*
1631 * Free RX buffers allocated so far, we will only handle
1632 * the rings that completed, the failing case will have
1633 * cleaned up for itself. 'j' failed, so its the terminus.
1634 */
1635 for (int i = 0; i < j; ++i) {
1636 rxr = &adapter->rx_rings[i];
1637 IXGBE_RX_LOCK(rxr);
1638 ixgbe_free_receive_ring(rxr);
1639 IXGBE_RX_UNLOCK(rxr);
1640 }
1641
1642 return (ENOBUFS);
1643 } /* ixgbe_setup_receive_structures */
1644
1645
1646 /************************************************************************
1647 * ixgbe_free_receive_structures - Free all receive rings.
1648 ************************************************************************/
1649 void
1650 ixgbe_free_receive_structures(struct adapter *adapter)
1651 {
1652 struct rx_ring *rxr = adapter->rx_rings;
1653
1654 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1655
1656 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1657 ixgbe_free_receive_buffers(rxr);
1658 #ifdef LRO
1659 /* Free LRO memory */
1660 tcp_lro_free(&rxr->lro);
1661 #endif /* LRO */
1662 /* Free the ring memory as well */
1663 ixgbe_dma_free(adapter, &rxr->rxdma);
1664 IXGBE_RX_LOCK_DESTROY(rxr);
1665 }
1666
1667 free(adapter->rx_rings, M_DEVBUF);
1668 } /* ixgbe_free_receive_structures */
1669
1670
1671 /************************************************************************
1672 * ixgbe_free_receive_buffers - Free receive ring data structures
1673 ************************************************************************/
1674 static void
1675 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1676 {
1677 struct adapter *adapter = rxr->adapter;
1678 struct ixgbe_rx_buf *rxbuf;
1679
1680 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1681
1682 /* Cleanup any existing buffers */
1683 if (rxr->rx_buffers != NULL) {
1684 for (int i = 0; i < adapter->num_rx_desc; i++) {
1685 rxbuf = &rxr->rx_buffers[i];
1686 ixgbe_rx_discard(rxr, i);
1687 if (rxbuf->pmap != NULL) {
1688 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1689 rxbuf->pmap = NULL;
1690 }
1691 }
1692 if (rxr->rx_buffers != NULL) {
1693 free(rxr->rx_buffers, M_DEVBUF);
1694 rxr->rx_buffers = NULL;
1695 }
1696 }
1697
1698 if (rxr->ptag != NULL) {
1699 ixgbe_dma_tag_destroy(rxr->ptag);
1700 rxr->ptag = NULL;
1701 }
1702
1703 return;
1704 } /* ixgbe_free_receive_buffers */
1705
1706 /************************************************************************
1707 * ixgbe_rx_input
1708 ************************************************************************/
1709 static __inline void
1710 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1711 u32 ptype)
1712 {
1713 struct adapter *adapter = ifp->if_softc;
1714
1715 #ifdef LRO
1716 struct ethercom *ec = &adapter->osdep.ec;
1717
1718 /*
1719 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1720 * should be computed by hardware. Also it should not have VLAN tag in
1721 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1722 */
1723 if (rxr->lro_enabled &&
1724 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1725 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1726 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1727 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1728 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1729 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1730 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1731 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1732 /*
1733 * Send to the stack if:
1734 ** - LRO not enabled, or
1735 ** - no LRO resources, or
1736 ** - lro enqueue fails
1737 */
1738 if (rxr->lro.lro_cnt != 0)
1739 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1740 return;
1741 }
1742 #endif /* LRO */
1743
1744 if_percpuq_enqueue(adapter->ipq, m);
1745 } /* ixgbe_rx_input */
1746
1747 /************************************************************************
1748 * ixgbe_rx_discard
1749 ************************************************************************/
1750 static __inline void
1751 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1752 {
1753 struct ixgbe_rx_buf *rbuf;
1754
1755 rbuf = &rxr->rx_buffers[i];
1756
1757 /*
1758 * With advanced descriptors the writeback
1759 * clobbers the buffer addrs, so its easier
1760 * to just free the existing mbufs and take
1761 * the normal refresh path to get new buffers
1762 * and mapping.
1763 */
1764
1765 if (rbuf->fmp != NULL) {/* Partial chain ? */
1766 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1767 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1768 m_freem(rbuf->fmp);
1769 rbuf->fmp = NULL;
1770 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1771 } else if (rbuf->buf) {
1772 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1773 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1774 m_free(rbuf->buf);
1775 rbuf->buf = NULL;
1776 }
1777 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1778
1779 rbuf->flags = 0;
1780
1781 return;
1782 } /* ixgbe_rx_discard */
1783
1784
1785 /************************************************************************
1786 * ixgbe_rxeof
1787 *
1788 * Executes in interrupt context. It replenishes the
1789 * mbufs in the descriptor and sends data which has
1790 * been dma'ed into host memory to upper layer.
1791 *
1792 * Return TRUE for more work, FALSE for all clean.
1793 ************************************************************************/
1794 bool
1795 ixgbe_rxeof(struct ix_queue *que)
1796 {
1797 struct adapter *adapter = que->adapter;
1798 struct rx_ring *rxr = que->rxr;
1799 struct ifnet *ifp = adapter->ifp;
1800 #ifdef LRO
1801 struct lro_ctrl *lro = &rxr->lro;
1802 #endif /* LRO */
1803 union ixgbe_adv_rx_desc *cur;
1804 struct ixgbe_rx_buf *rbuf, *nbuf;
1805 int i, nextp, processed = 0;
1806 u32 staterr = 0;
1807 u32 count = adapter->rx_process_limit;
1808 #ifdef RSS
1809 u16 pkt_info;
1810 #endif
1811
1812 IXGBE_RX_LOCK(rxr);
1813
1814 #ifdef DEV_NETMAP
1815 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1816 /* Same as the txeof routine: wakeup clients on intr. */
1817 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1818 IXGBE_RX_UNLOCK(rxr);
1819 return (FALSE);
1820 }
1821 }
1822 #endif /* DEV_NETMAP */
1823
1824 for (i = rxr->next_to_check; count != 0;) {
1825 struct mbuf *sendmp, *mp;
1826 u32 rsc, ptype;
1827 u16 len;
1828 u16 vtag = 0;
1829 bool eop;
1830
1831 /* Sync the ring. */
1832 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1833 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1834
1835 cur = &rxr->rx_base[i];
1836 staterr = le32toh(cur->wb.upper.status_error);
1837 #ifdef RSS
1838 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1839 #endif
1840
1841 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1842 break;
1843
1844 count--;
1845 sendmp = NULL;
1846 nbuf = NULL;
1847 rsc = 0;
1848 cur->wb.upper.status_error = 0;
1849 rbuf = &rxr->rx_buffers[i];
1850 mp = rbuf->buf;
1851
1852 len = le16toh(cur->wb.upper.length);
1853 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1854 IXGBE_RXDADV_PKTTYPE_MASK;
1855 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1856
1857 /* Make sure bad packets are discarded */
1858 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1859 #if __FreeBSD_version >= 1100036
1860 if (adapter->feat_en & IXGBE_FEATURE_VF)
1861 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1862 #endif
1863 rxr->rx_discarded.ev_count++;
1864 ixgbe_rx_discard(rxr, i);
1865 goto next_desc;
1866 }
1867
1868 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1869 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1870
1871 /*
1872 * On 82599 which supports a hardware
1873 * LRO (called HW RSC), packets need
1874 * not be fragmented across sequential
1875 * descriptors, rather the next descriptor
1876 * is indicated in bits of the descriptor.
1877 * This also means that we might proceses
1878 * more than one packet at a time, something
1879 * that has never been true before, it
1880 * required eliminating global chain pointers
1881 * in favor of what we are doing here. -jfv
1882 */
1883 if (!eop) {
1884 /*
1885 * Figure out the next descriptor
1886 * of this frame.
1887 */
1888 if (rxr->hw_rsc == TRUE) {
1889 rsc = ixgbe_rsc_count(cur);
1890 rxr->rsc_num += (rsc - 1);
1891 }
1892 if (rsc) { /* Get hardware index */
1893 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1894 IXGBE_RXDADV_NEXTP_SHIFT);
1895 } else { /* Just sequential */
1896 nextp = i + 1;
1897 if (nextp == adapter->num_rx_desc)
1898 nextp = 0;
1899 }
1900 nbuf = &rxr->rx_buffers[nextp];
1901 prefetch(nbuf);
1902 }
1903 /*
1904 * Rather than using the fmp/lmp global pointers
1905 * we now keep the head of a packet chain in the
1906 * buffer struct and pass this along from one
1907 * descriptor to the next, until we get EOP.
1908 */
1909 mp->m_len = len;
1910 /*
1911 * See if there is a stored head
1912 * that determines what we are
1913 */
1914 sendmp = rbuf->fmp;
1915 if (sendmp != NULL) { /* secondary frag */
1916 rbuf->buf = rbuf->fmp = NULL;
1917 mp->m_flags &= ~M_PKTHDR;
1918 sendmp->m_pkthdr.len += mp->m_len;
1919 } else {
1920 /*
1921 * Optimize. This might be a small packet,
1922 * maybe just a TCP ACK. Do a fast copy that
1923 * is cache aligned into a new mbuf, and
1924 * leave the old mbuf+cluster for re-use.
1925 */
1926 if (eop && len <= IXGBE_RX_COPY_LEN) {
1927 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1928 if (sendmp != NULL) {
1929 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1930 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1931 len);
1932 sendmp->m_len = len;
1933 rxr->rx_copies.ev_count++;
1934 rbuf->flags |= IXGBE_RX_COPY;
1935 }
1936 }
1937 if (sendmp == NULL) {
1938 rbuf->buf = rbuf->fmp = NULL;
1939 sendmp = mp;
1940 }
1941
1942 /* first desc of a non-ps chain */
1943 sendmp->m_flags |= M_PKTHDR;
1944 sendmp->m_pkthdr.len = mp->m_len;
1945 }
1946 ++processed;
1947
1948 /* Pass the head pointer on */
1949 if (eop == 0) {
1950 nbuf->fmp = sendmp;
1951 sendmp = NULL;
1952 mp->m_next = nbuf->buf;
1953 } else { /* Sending this frame */
1954 m_set_rcvif(sendmp, ifp);
1955 ++rxr->packets;
1956 rxr->rx_packets.ev_count++;
1957 /* capture data for AIM */
1958 rxr->bytes += sendmp->m_pkthdr.len;
1959 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1960 /* Process vlan info */
1961 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1962 vtag = le16toh(cur->wb.upper.vlan);
1963 if (vtag) {
1964 vlan_set_tag(sendmp, vtag);
1965 }
1966 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1967 ixgbe_rx_checksum(staterr, sendmp, ptype,
1968 &adapter->stats.pf);
1969 }
1970
1971 #if 0 /* FreeBSD */
1972 /*
1973 * In case of multiqueue, we have RXCSUM.PCSD bit set
1974 * and never cleared. This means we have RSS hash
1975 * available to be used.
1976 */
1977 if (adapter->num_queues > 1) {
1978 sendmp->m_pkthdr.flowid =
1979 le32toh(cur->wb.lower.hi_dword.rss);
1980 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1981 case IXGBE_RXDADV_RSSTYPE_IPV4:
1982 M_HASHTYPE_SET(sendmp,
1983 M_HASHTYPE_RSS_IPV4);
1984 break;
1985 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1986 M_HASHTYPE_SET(sendmp,
1987 M_HASHTYPE_RSS_TCP_IPV4);
1988 break;
1989 case IXGBE_RXDADV_RSSTYPE_IPV6:
1990 M_HASHTYPE_SET(sendmp,
1991 M_HASHTYPE_RSS_IPV6);
1992 break;
1993 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1994 M_HASHTYPE_SET(sendmp,
1995 M_HASHTYPE_RSS_TCP_IPV6);
1996 break;
1997 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1998 M_HASHTYPE_SET(sendmp,
1999 M_HASHTYPE_RSS_IPV6_EX);
2000 break;
2001 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
2002 M_HASHTYPE_SET(sendmp,
2003 M_HASHTYPE_RSS_TCP_IPV6_EX);
2004 break;
2005 #if __FreeBSD_version > 1100000
2006 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2007 M_HASHTYPE_SET(sendmp,
2008 M_HASHTYPE_RSS_UDP_IPV4);
2009 break;
2010 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2011 M_HASHTYPE_SET(sendmp,
2012 M_HASHTYPE_RSS_UDP_IPV6);
2013 break;
2014 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2015 M_HASHTYPE_SET(sendmp,
2016 M_HASHTYPE_RSS_UDP_IPV6_EX);
2017 break;
2018 #endif
2019 default:
2020 M_HASHTYPE_SET(sendmp,
2021 M_HASHTYPE_OPAQUE_HASH);
2022 }
2023 } else {
2024 sendmp->m_pkthdr.flowid = que->msix;
2025 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2026 }
2027 #endif
2028 }
2029 next_desc:
2030 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2031 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2032
2033 /* Advance our pointers to the next descriptor. */
2034 if (++i == rxr->num_desc)
2035 i = 0;
2036
2037 /* Now send to the stack or do LRO */
2038 if (sendmp != NULL) {
2039 rxr->next_to_check = i;
2040 IXGBE_RX_UNLOCK(rxr);
2041 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2042 IXGBE_RX_LOCK(rxr);
2043 i = rxr->next_to_check;
2044 }
2045
2046 /* Every 8 descriptors we go to refresh mbufs */
2047 if (processed == 8) {
2048 ixgbe_refresh_mbufs(rxr, i);
2049 processed = 0;
2050 }
2051 }
2052
2053 /* Refresh any remaining buf structs */
2054 if (ixgbe_rx_unrefreshed(rxr))
2055 ixgbe_refresh_mbufs(rxr, i);
2056
2057 rxr->next_to_check = i;
2058
2059 IXGBE_RX_UNLOCK(rxr);
2060
2061 #ifdef LRO
2062 /*
2063 * Flush any outstanding LRO work
2064 */
2065 tcp_lro_flush_all(lro);
2066 #endif /* LRO */
2067
2068 /*
2069 * Still have cleaning to do?
2070 */
2071 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2072 return (TRUE);
2073
2074 return (FALSE);
2075 } /* ixgbe_rxeof */
2076
2077
2078 /************************************************************************
2079 * ixgbe_rx_checksum
2080 *
2081 * Verify that the hardware indicated that the checksum is valid.
2082 * Inform the stack about the status of checksum so that stack
2083 * doesn't spend time verifying the checksum.
2084 ************************************************************************/
2085 static void
2086 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2087 struct ixgbe_hw_stats *stats)
2088 {
2089 u16 status = (u16)staterr;
2090 u8 errors = (u8)(staterr >> 24);
2091 #if 0
2092 bool sctp = false;
2093
2094 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2095 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2096 sctp = true;
2097 #endif
2098
2099 /* IPv4 checksum */
2100 if (status & IXGBE_RXD_STAT_IPCS) {
2101 stats->ipcs.ev_count++;
2102 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2103 /* IP Checksum Good */
2104 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2105 } else {
2106 stats->ipcs_bad.ev_count++;
2107 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2108 }
2109 }
2110 /* TCP/UDP/SCTP checksum */
2111 if (status & IXGBE_RXD_STAT_L4CS) {
2112 stats->l4cs.ev_count++;
2113 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2114 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2115 mp->m_pkthdr.csum_flags |= type;
2116 } else {
2117 stats->l4cs_bad.ev_count++;
2118 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2119 }
2120 }
2121 } /* ixgbe_rx_checksum */
2122
2123 /************************************************************************
2124 * ixgbe_dma_malloc
2125 ************************************************************************/
2126 int
2127 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2128 struct ixgbe_dma_alloc *dma, const int mapflags)
2129 {
2130 device_t dev = adapter->dev;
2131 int r, rsegs;
2132
2133 r = ixgbe_dma_tag_create(
2134 /* parent */ adapter->osdep.dmat,
2135 /* alignment */ DBA_ALIGN,
2136 /* bounds */ 0,
2137 /* maxsize */ size,
2138 /* nsegments */ 1,
2139 /* maxsegsize */ size,
2140 /* flags */ BUS_DMA_ALLOCNOW,
2141 &dma->dma_tag);
2142 if (r != 0) {
2143 aprint_error_dev(dev,
2144 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2145 r);
2146 goto fail_0;
2147 }
2148
2149 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2150 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2151 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2152 if (r != 0) {
2153 aprint_error_dev(dev,
2154 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2155 goto fail_1;
2156 }
2157
2158 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2159 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2160 if (r != 0) {
2161 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2162 __func__, r);
2163 goto fail_2;
2164 }
2165
2166 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2167 if (r != 0) {
2168 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2169 __func__, r);
2170 goto fail_3;
2171 }
2172
2173 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2174 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2175 if (r != 0) {
2176 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2177 __func__, r);
2178 goto fail_4;
2179 }
2180 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2181 dma->dma_size = size;
2182 return 0;
2183 fail_4:
2184 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2185 fail_3:
2186 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2187 fail_2:
2188 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2189 fail_1:
2190 ixgbe_dma_tag_destroy(dma->dma_tag);
2191 fail_0:
2192
2193 return (r);
2194 } /* ixgbe_dma_malloc */
2195
2196 /************************************************************************
2197 * ixgbe_dma_free
2198 ************************************************************************/
2199 void
2200 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2201 {
2202 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2203 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2204 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2205 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2206 ixgbe_dma_tag_destroy(dma->dma_tag);
2207 } /* ixgbe_dma_free */
2208
2209
2210 /************************************************************************
2211 * ixgbe_allocate_queues
2212 *
2213 * Allocate memory for the transmit and receive rings, and then
2214 * the descriptors associated with each, called only once at attach.
2215 ************************************************************************/
2216 int
2217 ixgbe_allocate_queues(struct adapter *adapter)
2218 {
2219 device_t dev = adapter->dev;
2220 struct ix_queue *que;
2221 struct tx_ring *txr;
2222 struct rx_ring *rxr;
2223 int rsize, tsize, error = IXGBE_SUCCESS;
2224 int txconf = 0, rxconf = 0;
2225
2226 /* First, allocate the top level queue structs */
2227 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2228 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2229 if (adapter->queues == NULL) {
2230 aprint_error_dev(dev, "Unable to allocate queue memory\n");
2231 error = ENOMEM;
2232 goto fail;
2233 }
2234
2235 /* Second, allocate the TX ring struct memory */
2236 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2237 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2238 if (adapter->tx_rings == NULL) {
2239 aprint_error_dev(dev, "Unable to allocate TX ring memory\n");
2240 error = ENOMEM;
2241 goto tx_fail;
2242 }
2243
2244 /* Third, allocate the RX ring */
2245 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2246 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2247 if (adapter->rx_rings == NULL) {
2248 aprint_error_dev(dev, "Unable to allocate RX ring memory\n");
2249 error = ENOMEM;
2250 goto rx_fail;
2251 }
2252
2253 /* For the ring itself */
2254 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2255 DBA_ALIGN);
2256
2257 /*
2258 * Now set up the TX queues, txconf is needed to handle the
2259 * possibility that things fail midcourse and we need to
2260 * undo memory gracefully
2261 */
2262 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2263 /* Set up some basics */
2264 txr = &adapter->tx_rings[i];
2265 txr->adapter = adapter;
2266 txr->txr_interq = NULL;
2267 /* In case SR-IOV is enabled, align the index properly */
2268 #ifdef PCI_IOV
2269 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2270 i);
2271 #else
2272 txr->me = i;
2273 #endif
2274 txr->num_desc = adapter->num_tx_desc;
2275
2276 /* Initialize the TX side lock */
2277 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2278
2279 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2280 BUS_DMA_NOWAIT)) {
2281 aprint_error_dev(dev,
2282 "Unable to allocate TX Descriptor memory\n");
2283 error = ENOMEM;
2284 goto err_tx_desc;
2285 }
2286 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2287 bzero((void *)txr->tx_base, tsize);
2288
2289 /* Now allocate transmit buffers for the ring */
2290 if (ixgbe_allocate_transmit_buffers(txr)) {
2291 aprint_error_dev(dev,
2292 "Critical Failure setting up transmit buffers\n");
2293 error = ENOMEM;
2294 goto err_tx_desc;
2295 }
2296 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2297 /* Allocate a buf ring */
2298 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2299 if (txr->txr_interq == NULL) {
2300 aprint_error_dev(dev,
2301 "Critical Failure setting up buf ring\n");
2302 error = ENOMEM;
2303 goto err_tx_desc;
2304 }
2305 }
2306 }
2307
2308 /*
2309 * Next the RX queues...
2310 */
2311 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2312 DBA_ALIGN);
2313 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2314 rxr = &adapter->rx_rings[i];
2315 /* Set up some basics */
2316 rxr->adapter = adapter;
2317 #ifdef PCI_IOV
2318 /* In case SR-IOV is enabled, align the index properly */
2319 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2320 i);
2321 #else
2322 rxr->me = i;
2323 #endif
2324 rxr->num_desc = adapter->num_rx_desc;
2325
2326 /* Initialize the RX side lock */
2327 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2328
2329 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2330 BUS_DMA_NOWAIT)) {
2331 aprint_error_dev(dev,
2332 "Unable to allocate RxDescriptor memory\n");
2333 error = ENOMEM;
2334 goto err_rx_desc;
2335 }
2336 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2337 bzero((void *)rxr->rx_base, rsize);
2338
2339 /* Allocate receive buffers for the ring */
2340 if (ixgbe_allocate_receive_buffers(rxr)) {
2341 aprint_error_dev(dev,
2342 "Critical Failure setting up receive buffers\n");
2343 error = ENOMEM;
2344 goto err_rx_desc;
2345 }
2346 }
2347
2348 /*
2349 * Finally set up the queue holding structs
2350 */
2351 for (int i = 0; i < adapter->num_queues; i++) {
2352 que = &adapter->queues[i];
2353 que->adapter = adapter;
2354 que->me = i;
2355 que->txr = &adapter->tx_rings[i];
2356 que->rxr = &adapter->rx_rings[i];
2357
2358 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2359 que->disabled_count = 0;
2360 }
2361
2362 return (0);
2363
2364 err_rx_desc:
2365 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2366 ixgbe_dma_free(adapter, &rxr->rxdma);
2367 err_tx_desc:
2368 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2369 ixgbe_dma_free(adapter, &txr->txdma);
2370 free(adapter->rx_rings, M_DEVBUF);
2371 rx_fail:
2372 free(adapter->tx_rings, M_DEVBUF);
2373 tx_fail:
2374 free(adapter->queues, M_DEVBUF);
2375 fail:
2376 return (error);
2377 } /* ixgbe_allocate_queues */
2378