ix_txrx.c revision 1.60 1 /* $NetBSD: ix_txrx.c,v 1.60 2020/01/21 14:55:55 msaitoh Exp $ */
2
3 /******************************************************************************
4
5 Copyright (c) 2001-2017, Intel Corporation
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 1. Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17
18 3. Neither the name of the Intel Corporation nor the names of its
19 contributors may be used to endorse or promote products derived from
20 this software without specific prior written permission.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33
34 ******************************************************************************/
35 /*$FreeBSD: head/sys/dev/ixgbe/ix_txrx.c 327031 2017-12-20 18:15:06Z erj $*/
36
37 /*
38 * Copyright (c) 2011 The NetBSD Foundation, Inc.
39 * All rights reserved.
40 *
41 * This code is derived from software contributed to The NetBSD Foundation
42 * by Coyote Point Systems, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
54 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
57 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
58 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
59 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
61 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
62 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
63 * POSSIBILITY OF SUCH DAMAGE.
64 */
65
66 #include "opt_inet.h"
67 #include "opt_inet6.h"
68
69 #include "ixgbe.h"
70
71 /*
72 * HW RSC control:
73 * this feature only works with
74 * IPv4, and only on 82599 and later.
75 * Also this will cause IP forwarding to
76 * fail and that can't be controlled by
77 * the stack as LRO can. For all these
78 * reasons I've deemed it best to leave
79 * this off and not bother with a tuneable
80 * interface, this would need to be compiled
81 * to enable.
82 */
83 static bool ixgbe_rsc_enable = FALSE;
84
85 /*
86 * For Flow Director: this is the
87 * number of TX packets we sample
88 * for the filter pool, this means
89 * every 20th packet will be probed.
90 *
91 * This feature can be disabled by
92 * setting this to 0.
93 */
94 static int atr_sample_rate = 20;
95
96 /************************************************************************
97 * Local Function prototypes
98 ************************************************************************/
99 static void ixgbe_setup_transmit_ring(struct tx_ring *);
100 static void ixgbe_free_transmit_buffers(struct tx_ring *);
101 static int ixgbe_setup_receive_ring(struct rx_ring *);
102 static void ixgbe_free_receive_buffers(struct rx_ring *);
103 static void ixgbe_rx_checksum(u32, struct mbuf *, u32,
104 struct ixgbe_hw_stats *);
105 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
106 static void ixgbe_drain(struct ifnet *, struct tx_ring *);
107 static int ixgbe_xmit(struct tx_ring *, struct mbuf *);
108 static int ixgbe_tx_ctx_setup(struct tx_ring *,
109 struct mbuf *, u32 *, u32 *);
110 static int ixgbe_tso_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
113 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
114 struct mbuf *, u32);
115 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
116 struct ixgbe_dma_alloc *, int);
117 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
118
119 static void ixgbe_setup_hw_rsc(struct rx_ring *);
120
121 /************************************************************************
122 * ixgbe_legacy_start_locked - Transmit entry point
123 *
124 * Called by the stack to initiate a transmit.
125 * The driver will remain in this routine as long as there are
126 * packets to transmit and transmit resources are available.
127 * In case resources are not available, the stack is notified
128 * and the packet is requeued.
129 ************************************************************************/
130 int
131 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
132 {
133 int rc;
134 struct mbuf *m_head;
135 struct adapter *adapter = txr->adapter;
136
137 IXGBE_TX_LOCK_ASSERT(txr);
138
139 if (adapter->link_active != LINK_STATE_UP) {
140 /*
141 * discard all packets buffered in IFQ to avoid
142 * sending old packets at next link up timing.
143 */
144 ixgbe_drain(ifp, txr);
145 return (ENETDOWN);
146 }
147 if ((ifp->if_flags & IFF_RUNNING) == 0)
148 return (ENETDOWN);
149 if (txr->txr_no_space)
150 return (ENETDOWN);
151
152 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
153 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
154 break;
155
156 IFQ_POLL(&ifp->if_snd, m_head);
157 if (m_head == NULL)
158 break;
159
160 if ((rc = ixgbe_xmit(txr, m_head)) == EAGAIN) {
161 break;
162 }
163 IFQ_DEQUEUE(&ifp->if_snd, m_head);
164 if (rc != 0) {
165 m_freem(m_head);
166 continue;
167 }
168
169 /* Send a copy of the frame to the BPF listener */
170 bpf_mtap(ifp, m_head, BPF_D_OUT);
171 }
172
173 return IXGBE_SUCCESS;
174 } /* ixgbe_legacy_start_locked */
175
176 /************************************************************************
177 * ixgbe_legacy_start
178 *
179 * Called by the stack, this always uses the first tx ring,
180 * and should not be used with multiqueue tx enabled.
181 ************************************************************************/
182 void
183 ixgbe_legacy_start(struct ifnet *ifp)
184 {
185 struct adapter *adapter = ifp->if_softc;
186 struct tx_ring *txr = adapter->tx_rings;
187
188 if (ifp->if_flags & IFF_RUNNING) {
189 IXGBE_TX_LOCK(txr);
190 ixgbe_legacy_start_locked(ifp, txr);
191 IXGBE_TX_UNLOCK(txr);
192 }
193 } /* ixgbe_legacy_start */
194
195 /************************************************************************
196 * ixgbe_mq_start - Multiqueue Transmit Entry Point
197 *
198 * (if_transmit function)
199 ************************************************************************/
200 int
201 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
202 {
203 struct adapter *adapter = ifp->if_softc;
204 struct tx_ring *txr;
205 int i;
206 #ifdef RSS
207 uint32_t bucket_id;
208 #endif
209
210 /*
211 * When doing RSS, map it to the same outbound queue
212 * as the incoming flow would be mapped to.
213 *
214 * If everything is setup correctly, it should be the
215 * same bucket that the current CPU we're on is.
216 */
217 #ifdef RSS
218 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
219 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
220 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
221 &bucket_id) == 0)) {
222 i = bucket_id % adapter->num_queues;
223 #ifdef IXGBE_DEBUG
224 if (bucket_id > adapter->num_queues)
225 if_printf(ifp,
226 "bucket_id (%d) > num_queues (%d)\n",
227 bucket_id, adapter->num_queues);
228 #endif
229 } else
230 i = m->m_pkthdr.flowid % adapter->num_queues;
231 } else
232 #endif /* 0 */
233 i = (cpu_index(curcpu()) % ncpu) % adapter->num_queues;
234
235 /* Check for a hung queue and pick alternative */
236 if (((1ULL << i) & adapter->active_queues) == 0)
237 i = ffs64(adapter->active_queues);
238
239 txr = &adapter->tx_rings[i];
240
241 if (__predict_false(!pcq_put(txr->txr_interq, m))) {
242 m_freem(m);
243 txr->pcq_drops.ev_count++;
244 return ENOBUFS;
245 }
246 if (IXGBE_TX_TRYLOCK(txr)) {
247 ixgbe_mq_start_locked(ifp, txr);
248 IXGBE_TX_UNLOCK(txr);
249 } else {
250 if (adapter->txrx_use_workqueue) {
251 u_int *enqueued;
252
253 /*
254 * This function itself is not called in interrupt
255 * context, however it can be called in fast softint
256 * context right after receiving forwarding packets.
257 * So, it is required to protect workqueue from twice
258 * enqueuing when the machine uses both spontaneous
259 * packets and forwarding packets.
260 */
261 enqueued = percpu_getref(adapter->txr_wq_enqueued);
262 if (*enqueued == 0) {
263 *enqueued = 1;
264 percpu_putref(adapter->txr_wq_enqueued);
265 workqueue_enqueue(adapter->txr_wq,
266 &txr->wq_cookie, curcpu());
267 } else
268 percpu_putref(adapter->txr_wq_enqueued);
269 } else {
270 kpreempt_disable();
271 softint_schedule(txr->txr_si);
272 kpreempt_enable();
273 }
274 }
275
276 return (0);
277 } /* ixgbe_mq_start */
278
279 /************************************************************************
280 * ixgbe_mq_start_locked
281 ************************************************************************/
282 int
283 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
284 {
285 struct mbuf *next;
286 int enqueued = 0, err = 0;
287
288 if (txr->adapter->link_active != LINK_STATE_UP) {
289 /*
290 * discard all packets buffered in txr_interq to avoid
291 * sending old packets at next link up timing.
292 */
293 ixgbe_drain(ifp, txr);
294 return (ENETDOWN);
295 }
296 if ((ifp->if_flags & IFF_RUNNING) == 0)
297 return (ENETDOWN);
298 if (txr->txr_no_space)
299 return (ENETDOWN);
300
301 /* Process the queue */
302 while ((next = pcq_get(txr->txr_interq)) != NULL) {
303 if ((err = ixgbe_xmit(txr, next)) != 0) {
304 m_freem(next);
305 /* All errors are counted in ixgbe_xmit() */
306 break;
307 }
308 enqueued++;
309 #if __FreeBSD_version >= 1100036
310 /*
311 * Since we're looking at the tx ring, we can check
312 * to see if we're a VF by examing our tail register
313 * address.
314 */
315 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
316 (next->m_flags & M_MCAST))
317 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
318 #endif
319 /* Send a copy of the frame to the BPF listener */
320 bpf_mtap(ifp, next, BPF_D_OUT);
321 if ((ifp->if_flags & IFF_RUNNING) == 0)
322 break;
323 }
324
325 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
326 ixgbe_txeof(txr);
327
328 return (err);
329 } /* ixgbe_mq_start_locked */
330
331 /************************************************************************
332 * ixgbe_deferred_mq_start
333 *
334 * Called from a softint and workqueue (indirectly) to drain queued
335 * transmit packets.
336 ************************************************************************/
337 void
338 ixgbe_deferred_mq_start(void *arg)
339 {
340 struct tx_ring *txr = arg;
341 struct adapter *adapter = txr->adapter;
342 struct ifnet *ifp = adapter->ifp;
343
344 IXGBE_TX_LOCK(txr);
345 if (pcq_peek(txr->txr_interq) != NULL)
346 ixgbe_mq_start_locked(ifp, txr);
347 IXGBE_TX_UNLOCK(txr);
348 } /* ixgbe_deferred_mq_start */
349
350 /************************************************************************
351 * ixgbe_deferred_mq_start_work
352 *
353 * Called from a workqueue to drain queued transmit packets.
354 ************************************************************************/
355 void
356 ixgbe_deferred_mq_start_work(struct work *wk, void *arg)
357 {
358 struct tx_ring *txr = container_of(wk, struct tx_ring, wq_cookie);
359 struct adapter *adapter = txr->adapter;
360 u_int *enqueued = percpu_getref(adapter->txr_wq_enqueued);
361 *enqueued = 0;
362 percpu_putref(adapter->txr_wq_enqueued);
363
364 ixgbe_deferred_mq_start(txr);
365 } /* ixgbe_deferred_mq_start */
366
367 /************************************************************************
368 * ixgbe_drain_all
369 ************************************************************************/
370 void
371 ixgbe_drain_all(struct adapter *adapter)
372 {
373 struct ifnet *ifp = adapter->ifp;
374 struct ix_queue *que = adapter->queues;
375
376 for (int i = 0; i < adapter->num_queues; i++, que++) {
377 struct tx_ring *txr = que->txr;
378
379 IXGBE_TX_LOCK(txr);
380 ixgbe_drain(ifp, txr);
381 IXGBE_TX_UNLOCK(txr);
382 }
383 }
384
385 /************************************************************************
386 * ixgbe_xmit
387 *
388 * Maps the mbufs to tx descriptors, allowing the
389 * TX engine to transmit the packets.
390 *
391 * Return 0 on success, positive on failure
392 ************************************************************************/
393 static int
394 ixgbe_xmit(struct tx_ring *txr, struct mbuf *m_head)
395 {
396 struct adapter *adapter = txr->adapter;
397 struct ixgbe_tx_buf *txbuf;
398 union ixgbe_adv_tx_desc *txd = NULL;
399 struct ifnet *ifp = adapter->ifp;
400 int i, j, error;
401 int first;
402 u32 olinfo_status = 0, cmd_type_len;
403 bool remap = TRUE;
404 bus_dmamap_t map;
405
406 /* Basic descriptor defines */
407 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
408 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
409
410 if (vlan_has_tag(m_head))
411 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
412
413 /*
414 * Important to capture the first descriptor
415 * used because it will contain the index of
416 * the one we tell the hardware to report back
417 */
418 first = txr->next_avail_desc;
419 txbuf = &txr->tx_buffers[first];
420 map = txbuf->map;
421
422 /*
423 * Map the packet for DMA.
424 */
425 retry:
426 error = bus_dmamap_load_mbuf(txr->txtag->dt_dmat, map, m_head,
427 BUS_DMA_NOWAIT);
428
429 if (__predict_false(error)) {
430 struct mbuf *m;
431
432 switch (error) {
433 case EAGAIN:
434 txr->q_eagain_tx_dma_setup++;
435 return EAGAIN;
436 case ENOMEM:
437 txr->q_enomem_tx_dma_setup++;
438 return EAGAIN;
439 case EFBIG:
440 /* Try it again? - one try */
441 if (remap == TRUE) {
442 remap = FALSE;
443 /*
444 * XXX: m_defrag will choke on
445 * non-MCLBYTES-sized clusters
446 */
447 txr->q_efbig_tx_dma_setup++;
448 m = m_defrag(m_head, M_NOWAIT);
449 if (m == NULL) {
450 txr->q_mbuf_defrag_failed++;
451 return ENOBUFS;
452 }
453 m_head = m;
454 goto retry;
455 } else {
456 txr->q_efbig2_tx_dma_setup++;
457 return error;
458 }
459 case EINVAL:
460 txr->q_einval_tx_dma_setup++;
461 return error;
462 default:
463 txr->q_other_tx_dma_setup++;
464 return error;
465 }
466 }
467
468 /* Make certain there are enough descriptors */
469 if (txr->tx_avail < (map->dm_nsegs + 2)) {
470 txr->txr_no_space = true;
471 txr->no_desc_avail.ev_count++;
472 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
473 return EAGAIN;
474 }
475
476 /*
477 * Set up the appropriate offload context
478 * this will consume the first descriptor
479 */
480 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
481 if (__predict_false(error)) {
482 return (error);
483 }
484
485 /* Do the flow director magic */
486 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
487 (txr->atr_sample) && (!adapter->fdir_reinit)) {
488 ++txr->atr_count;
489 if (txr->atr_count >= atr_sample_rate) {
490 ixgbe_atr(txr, m_head);
491 txr->atr_count = 0;
492 }
493 }
494
495 olinfo_status |= IXGBE_ADVTXD_CC;
496 i = txr->next_avail_desc;
497 for (j = 0; j < map->dm_nsegs; j++) {
498 bus_size_t seglen;
499 bus_addr_t segaddr;
500
501 txbuf = &txr->tx_buffers[i];
502 txd = &txr->tx_base[i];
503 seglen = map->dm_segs[j].ds_len;
504 segaddr = htole64(map->dm_segs[j].ds_addr);
505
506 txd->read.buffer_addr = segaddr;
507 txd->read.cmd_type_len = htole32(cmd_type_len | seglen);
508 txd->read.olinfo_status = htole32(olinfo_status);
509
510 if (++i == txr->num_desc)
511 i = 0;
512 }
513
514 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
515 txr->tx_avail -= map->dm_nsegs;
516 txr->next_avail_desc = i;
517
518 txbuf->m_head = m_head;
519 /*
520 * Here we swap the map so the last descriptor,
521 * which gets the completion interrupt has the
522 * real map, and the first descriptor gets the
523 * unused map from this descriptor.
524 */
525 txr->tx_buffers[first].map = txbuf->map;
526 txbuf->map = map;
527 bus_dmamap_sync(txr->txtag->dt_dmat, map, 0, m_head->m_pkthdr.len,
528 BUS_DMASYNC_PREWRITE);
529
530 /* Set the EOP descriptor that will be marked done */
531 txbuf = &txr->tx_buffers[first];
532 txbuf->eop = txd;
533
534 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
535 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
536 /*
537 * Advance the Transmit Descriptor Tail (Tdt), this tells the
538 * hardware that this frame is available to transmit.
539 */
540 ++txr->total_packets.ev_count;
541 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
542
543 /*
544 * XXXX NOMPSAFE: ifp->if_data should be percpu.
545 */
546 ifp->if_obytes += m_head->m_pkthdr.len;
547 if (m_head->m_flags & M_MCAST)
548 ifp->if_omcasts++;
549
550 /* Mark queue as having work */
551 if (txr->busy == 0)
552 txr->busy = 1;
553
554 return (0);
555 } /* ixgbe_xmit */
556
557 /************************************************************************
558 * ixgbe_drain
559 ************************************************************************/
560 static void
561 ixgbe_drain(struct ifnet *ifp, struct tx_ring *txr)
562 {
563 struct mbuf *m;
564
565 IXGBE_TX_LOCK_ASSERT(txr);
566
567 if (txr->me == 0) {
568 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
569 IFQ_DEQUEUE(&ifp->if_snd, m);
570 m_freem(m);
571 IF_DROP(&ifp->if_snd);
572 }
573 }
574
575 while ((m = pcq_get(txr->txr_interq)) != NULL) {
576 m_freem(m);
577 txr->pcq_drops.ev_count++;
578 }
579 }
580
581 /************************************************************************
582 * ixgbe_allocate_transmit_buffers
583 *
584 * Allocate memory for tx_buffer structures. The tx_buffer stores all
585 * the information needed to transmit a packet on the wire. This is
586 * called only once at attach, setup is done every reset.
587 ************************************************************************/
588 static int
589 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
590 {
591 struct adapter *adapter = txr->adapter;
592 device_t dev = adapter->dev;
593 struct ixgbe_tx_buf *txbuf;
594 int error, i;
595
596 /*
597 * Setup DMA descriptor areas.
598 */
599 error = ixgbe_dma_tag_create(
600 /* parent */ adapter->osdep.dmat,
601 /* alignment */ 1,
602 /* bounds */ 0,
603 /* maxsize */ IXGBE_TSO_SIZE,
604 /* nsegments */ adapter->num_segs,
605 /* maxsegsize */ PAGE_SIZE,
606 /* flags */ 0,
607 &txr->txtag);
608 if (error != 0) {
609 aprint_error_dev(dev,"Unable to allocate TX DMA tag\n");
610 goto fail;
611 }
612
613 txr->tx_buffers = malloc(sizeof(struct ixgbe_tx_buf) *
614 adapter->num_tx_desc, M_DEVBUF, M_WAITOK | M_ZERO);
615
616 /* Create the descriptor buffer dma maps */
617 txbuf = txr->tx_buffers;
618 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
619 error = ixgbe_dmamap_create(txr->txtag, 0, &txbuf->map);
620 if (error != 0) {
621 aprint_error_dev(dev,
622 "Unable to create TX DMA map (%d)\n", error);
623 goto fail;
624 }
625 }
626
627 return 0;
628 fail:
629 /* We free all, it handles case where we are in the middle */
630 #if 0 /* XXX was FreeBSD */
631 ixgbe_free_transmit_structures(adapter);
632 #else
633 ixgbe_free_transmit_buffers(txr);
634 #endif
635 return (error);
636 } /* ixgbe_allocate_transmit_buffers */
637
638 /************************************************************************
639 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
640 ************************************************************************/
641 static void
642 ixgbe_setup_transmit_ring(struct tx_ring *txr)
643 {
644 struct adapter *adapter = txr->adapter;
645 struct ixgbe_tx_buf *txbuf;
646 #ifdef DEV_NETMAP
647 struct netmap_adapter *na = NA(adapter->ifp);
648 struct netmap_slot *slot;
649 #endif /* DEV_NETMAP */
650
651 /* Clear the old ring contents */
652 IXGBE_TX_LOCK(txr);
653
654 #ifdef DEV_NETMAP
655 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
656 /*
657 * (under lock): if in netmap mode, do some consistency
658 * checks and set slot to entry 0 of the netmap ring.
659 */
660 slot = netmap_reset(na, NR_TX, txr->me, 0);
661 }
662 #endif /* DEV_NETMAP */
663
664 bzero((void *)txr->tx_base,
665 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
666 /* Reset indices */
667 txr->next_avail_desc = 0;
668 txr->next_to_clean = 0;
669
670 /* Free any existing tx buffers. */
671 txbuf = txr->tx_buffers;
672 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
673 if (txbuf->m_head != NULL) {
674 bus_dmamap_sync(txr->txtag->dt_dmat, txbuf->map,
675 0, txbuf->m_head->m_pkthdr.len,
676 BUS_DMASYNC_POSTWRITE);
677 ixgbe_dmamap_unload(txr->txtag, txbuf->map);
678 m_freem(txbuf->m_head);
679 txbuf->m_head = NULL;
680 }
681
682 #ifdef DEV_NETMAP
683 /*
684 * In netmap mode, set the map for the packet buffer.
685 * NOTE: Some drivers (not this one) also need to set
686 * the physical buffer address in the NIC ring.
687 * Slots in the netmap ring (indexed by "si") are
688 * kring->nkr_hwofs positions "ahead" wrt the
689 * corresponding slot in the NIC ring. In some drivers
690 * (not here) nkr_hwofs can be negative. Function
691 * netmap_idx_n2k() handles wraparounds properly.
692 */
693 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
694 int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
695 netmap_load_map(na, txr->txtag,
696 txbuf->map, NMB(na, slot + si));
697 }
698 #endif /* DEV_NETMAP */
699
700 /* Clear the EOP descriptor pointer */
701 txbuf->eop = NULL;
702 }
703
704 /* Set the rate at which we sample packets */
705 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
706 txr->atr_sample = atr_sample_rate;
707
708 /* Set number of descriptors available */
709 txr->tx_avail = adapter->num_tx_desc;
710
711 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
712 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
713 IXGBE_TX_UNLOCK(txr);
714 } /* ixgbe_setup_transmit_ring */
715
716 /************************************************************************
717 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
718 ************************************************************************/
719 int
720 ixgbe_setup_transmit_structures(struct adapter *adapter)
721 {
722 struct tx_ring *txr = adapter->tx_rings;
723
724 for (int i = 0; i < adapter->num_queues; i++, txr++)
725 ixgbe_setup_transmit_ring(txr);
726
727 return (0);
728 } /* ixgbe_setup_transmit_structures */
729
730 /************************************************************************
731 * ixgbe_free_transmit_structures - Free all transmit rings.
732 ************************************************************************/
733 void
734 ixgbe_free_transmit_structures(struct adapter *adapter)
735 {
736 struct tx_ring *txr = adapter->tx_rings;
737
738 for (int i = 0; i < adapter->num_queues; i++, txr++) {
739 ixgbe_free_transmit_buffers(txr);
740 ixgbe_dma_free(adapter, &txr->txdma);
741 IXGBE_TX_LOCK_DESTROY(txr);
742 }
743 free(adapter->tx_rings, M_DEVBUF);
744 } /* ixgbe_free_transmit_structures */
745
746 /************************************************************************
747 * ixgbe_free_transmit_buffers
748 *
749 * Free transmit ring related data structures.
750 ************************************************************************/
751 static void
752 ixgbe_free_transmit_buffers(struct tx_ring *txr)
753 {
754 struct adapter *adapter = txr->adapter;
755 struct ixgbe_tx_buf *tx_buffer;
756 int i;
757
758 INIT_DEBUGOUT("ixgbe_free_transmit_buffers: begin");
759
760 if (txr->tx_buffers == NULL)
761 return;
762
763 tx_buffer = txr->tx_buffers;
764 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
765 if (tx_buffer->m_head != NULL) {
766 bus_dmamap_sync(txr->txtag->dt_dmat, tx_buffer->map,
767 0, tx_buffer->m_head->m_pkthdr.len,
768 BUS_DMASYNC_POSTWRITE);
769 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
770 m_freem(tx_buffer->m_head);
771 tx_buffer->m_head = NULL;
772 if (tx_buffer->map != NULL) {
773 ixgbe_dmamap_destroy(txr->txtag,
774 tx_buffer->map);
775 tx_buffer->map = NULL;
776 }
777 } else if (tx_buffer->map != NULL) {
778 ixgbe_dmamap_unload(txr->txtag, tx_buffer->map);
779 ixgbe_dmamap_destroy(txr->txtag, tx_buffer->map);
780 tx_buffer->map = NULL;
781 }
782 }
783 if (txr->txr_interq != NULL) {
784 struct mbuf *m;
785
786 while ((m = pcq_get(txr->txr_interq)) != NULL)
787 m_freem(m);
788 pcq_destroy(txr->txr_interq);
789 }
790 if (txr->tx_buffers != NULL) {
791 free(txr->tx_buffers, M_DEVBUF);
792 txr->tx_buffers = NULL;
793 }
794 if (txr->txtag != NULL) {
795 ixgbe_dma_tag_destroy(txr->txtag);
796 txr->txtag = NULL;
797 }
798 } /* ixgbe_free_transmit_buffers */
799
800 /************************************************************************
801 * ixgbe_tx_ctx_setup
802 *
803 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
804 ************************************************************************/
805 static int
806 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
807 u32 *cmd_type_len, u32 *olinfo_status)
808 {
809 struct adapter *adapter = txr->adapter;
810 struct ixgbe_adv_tx_context_desc *TXD;
811 struct ether_vlan_header *eh;
812 #ifdef INET
813 struct ip *ip;
814 #endif
815 #ifdef INET6
816 struct ip6_hdr *ip6;
817 #endif
818 int ehdrlen, ip_hlen = 0;
819 int offload = TRUE;
820 int ctxd = txr->next_avail_desc;
821 u32 vlan_macip_lens = 0;
822 u32 type_tucmd_mlhl = 0;
823 u16 vtag = 0;
824 u16 etype;
825 u8 ipproto = 0;
826 char *l3d;
827
828
829 /* First check if TSO is to be used */
830 if (mp->m_pkthdr.csum_flags & (M_CSUM_TSOv4 | M_CSUM_TSOv6)) {
831 int rv = ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status);
832
833 if (rv != 0)
834 ++adapter->tso_err.ev_count;
835 return rv;
836 }
837
838 if ((mp->m_pkthdr.csum_flags & M_CSUM_OFFLOAD) == 0)
839 offload = FALSE;
840
841 /* Indicate the whole packet as payload when not doing TSO */
842 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
843
844 /* Now ready a context descriptor */
845 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
846
847 /*
848 * In advanced descriptors the vlan tag must
849 * be placed into the context descriptor. Hence
850 * we need to make one even if not doing offloads.
851 */
852 if (vlan_has_tag(mp)) {
853 vtag = htole16(vlan_get_tag(mp));
854 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
855 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
856 (offload == FALSE))
857 return (0);
858
859 /*
860 * Determine where frame payload starts.
861 * Jump over vlan headers if already present,
862 * helpful for QinQ too.
863 */
864 KASSERT(mp->m_len >= offsetof(struct ether_vlan_header, evl_tag));
865 eh = mtod(mp, struct ether_vlan_header *);
866 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
867 KASSERT(mp->m_len >= sizeof(struct ether_vlan_header));
868 etype = ntohs(eh->evl_proto);
869 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
870 } else {
871 etype = ntohs(eh->evl_encap_proto);
872 ehdrlen = ETHER_HDR_LEN;
873 }
874
875 /* Set the ether header length */
876 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
877
878 if (offload == FALSE)
879 goto no_offloads;
880
881 /*
882 * If the first mbuf only includes the ethernet header,
883 * jump to the next one
884 * XXX: This assumes the stack splits mbufs containing headers
885 * on header boundaries
886 * XXX: And assumes the entire IP header is contained in one mbuf
887 */
888 if (mp->m_len == ehdrlen && mp->m_next)
889 l3d = mtod(mp->m_next, char *);
890 else
891 l3d = mtod(mp, char *) + ehdrlen;
892
893 switch (etype) {
894 #ifdef INET
895 case ETHERTYPE_IP:
896 ip = (struct ip *)(l3d);
897 ip_hlen = ip->ip_hl << 2;
898 ipproto = ip->ip_p;
899 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
900 KASSERT((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) == 0 ||
901 ip->ip_sum == 0);
902 break;
903 #endif
904 #ifdef INET6
905 case ETHERTYPE_IPV6:
906 ip6 = (struct ip6_hdr *)(l3d);
907 ip_hlen = sizeof(struct ip6_hdr);
908 ipproto = ip6->ip6_nxt;
909 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
910 break;
911 #endif
912 default:
913 offload = false;
914 break;
915 }
916
917 if ((mp->m_pkthdr.csum_flags & M_CSUM_IPv4) != 0)
918 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
919
920 vlan_macip_lens |= ip_hlen;
921
922 /* No support for offloads for non-L4 next headers */
923 switch (ipproto) {
924 case IPPROTO_TCP:
925 if (mp->m_pkthdr.csum_flags &
926 (M_CSUM_TCPv4 | M_CSUM_TCPv6))
927 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
928 else
929 offload = false;
930 break;
931 case IPPROTO_UDP:
932 if (mp->m_pkthdr.csum_flags &
933 (M_CSUM_UDPv4 | M_CSUM_UDPv6))
934 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
935 else
936 offload = false;
937 break;
938 default:
939 offload = false;
940 break;
941 }
942
943 if (offload) /* Insert L4 checksum into data descriptors */
944 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
945
946 no_offloads:
947 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
948
949 /* Now copy bits into descriptor */
950 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
951 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
952 TXD->seqnum_seed = htole32(0);
953 TXD->mss_l4len_idx = htole32(0);
954
955 /* We've consumed the first desc, adjust counters */
956 if (++ctxd == txr->num_desc)
957 ctxd = 0;
958 txr->next_avail_desc = ctxd;
959 --txr->tx_avail;
960
961 return (0);
962 } /* ixgbe_tx_ctx_setup */
963
964 /************************************************************************
965 * ixgbe_tso_setup
966 *
967 * Setup work for hardware segmentation offload (TSO) on
968 * adapters using advanced tx descriptors
969 ************************************************************************/
970 static int
971 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
972 u32 *olinfo_status)
973 {
974 struct ixgbe_adv_tx_context_desc *TXD;
975 struct ether_vlan_header *eh;
976 #ifdef INET6
977 struct ip6_hdr *ip6;
978 #endif
979 #ifdef INET
980 struct ip *ip;
981 #endif
982 struct tcphdr *th;
983 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
984 u32 vlan_macip_lens = 0;
985 u32 type_tucmd_mlhl = 0;
986 u32 mss_l4len_idx = 0, paylen;
987 u16 vtag = 0, eh_type;
988
989 /*
990 * Determine where frame payload starts.
991 * Jump over vlan headers if already present
992 */
993 eh = mtod(mp, struct ether_vlan_header *);
994 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
995 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
996 eh_type = eh->evl_proto;
997 } else {
998 ehdrlen = ETHER_HDR_LEN;
999 eh_type = eh->evl_encap_proto;
1000 }
1001
1002 switch (ntohs(eh_type)) {
1003 #ifdef INET
1004 case ETHERTYPE_IP:
1005 ip = (struct ip *)(mp->m_data + ehdrlen);
1006 if (ip->ip_p != IPPROTO_TCP)
1007 return (ENXIO);
1008 ip->ip_sum = 0;
1009 ip_hlen = ip->ip_hl << 2;
1010 th = (struct tcphdr *)((char *)ip + ip_hlen);
1011 th->th_sum = in_cksum_phdr(ip->ip_src.s_addr,
1012 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1013 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
1014 /* Tell transmit desc to also do IPv4 checksum. */
1015 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
1016 break;
1017 #endif
1018 #ifdef INET6
1019 case ETHERTYPE_IPV6:
1020 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1021 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
1022 if (ip6->ip6_nxt != IPPROTO_TCP)
1023 return (ENXIO);
1024 ip_hlen = sizeof(struct ip6_hdr);
1025 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
1026 th = (struct tcphdr *)((char *)ip6 + ip_hlen);
1027 th->th_sum = in6_cksum_phdr(&ip6->ip6_src,
1028 &ip6->ip6_dst, 0, htonl(IPPROTO_TCP));
1029 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
1030 break;
1031 #endif
1032 default:
1033 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
1034 __func__, ntohs(eh_type));
1035 break;
1036 }
1037
1038 ctxd = txr->next_avail_desc;
1039 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
1040
1041 tcp_hlen = th->th_off << 2;
1042
1043 /* This is used in the transmit desc in encap */
1044 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
1045
1046 /* VLAN MACLEN IPLEN */
1047 if (vlan_has_tag(mp)) {
1048 vtag = htole16(vlan_get_tag(mp));
1049 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
1050 }
1051
1052 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
1053 vlan_macip_lens |= ip_hlen;
1054 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
1055
1056 /* ADV DTYPE TUCMD */
1057 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
1058 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
1059 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
1060
1061 /* MSS L4LEN IDX */
1062 mss_l4len_idx |= (mp->m_pkthdr.segsz << IXGBE_ADVTXD_MSS_SHIFT);
1063 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
1064 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
1065
1066 TXD->seqnum_seed = htole32(0);
1067
1068 if (++ctxd == txr->num_desc)
1069 ctxd = 0;
1070
1071 txr->tx_avail--;
1072 txr->next_avail_desc = ctxd;
1073 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1074 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1075 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1076 ++txr->tso_tx.ev_count;
1077
1078 return (0);
1079 } /* ixgbe_tso_setup */
1080
1081
1082 /************************************************************************
1083 * ixgbe_txeof
1084 *
1085 * Examine each tx_buffer in the used queue. If the hardware is done
1086 * processing the packet then free associated resources. The
1087 * tx_buffer is put back on the free queue.
1088 ************************************************************************/
1089 bool
1090 ixgbe_txeof(struct tx_ring *txr)
1091 {
1092 struct adapter *adapter = txr->adapter;
1093 struct ifnet *ifp = adapter->ifp;
1094 struct ixgbe_tx_buf *buf;
1095 union ixgbe_adv_tx_desc *txd;
1096 u32 work, processed = 0;
1097 u32 limit = adapter->tx_process_limit;
1098
1099 KASSERT(mutex_owned(&txr->tx_mtx));
1100
1101 #ifdef DEV_NETMAP
1102 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
1103 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
1104 struct netmap_adapter *na = NA(adapter->ifp);
1105 struct netmap_kring *kring = na->tx_rings[txr->me];
1106 txd = txr->tx_base;
1107 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1108 BUS_DMASYNC_POSTREAD);
1109 /*
1110 * In netmap mode, all the work is done in the context
1111 * of the client thread. Interrupt handlers only wake up
1112 * clients, which may be sleeping on individual rings
1113 * or on a global resource for all rings.
1114 * To implement tx interrupt mitigation, we wake up the client
1115 * thread roughly every half ring, even if the NIC interrupts
1116 * more frequently. This is implemented as follows:
1117 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1118 * the slot that should wake up the thread (nkr_num_slots
1119 * means the user thread should not be woken up);
1120 * - the driver ignores tx interrupts unless netmap_mitigate=0
1121 * or the slot has the DD bit set.
1122 */
1123 if (kring->nr_kflags < kring->nkr_num_slots &&
1124 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD) {
1125 netmap_tx_irq(ifp, txr->me);
1126 }
1127 return false;
1128 }
1129 #endif /* DEV_NETMAP */
1130
1131 if (txr->tx_avail == txr->num_desc) {
1132 txr->busy = 0;
1133 return false;
1134 }
1135
1136 /* Get work starting point */
1137 work = txr->next_to_clean;
1138 buf = &txr->tx_buffers[work];
1139 txd = &txr->tx_base[work];
1140 work -= txr->num_desc; /* The distance to ring end */
1141 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1142 BUS_DMASYNC_POSTREAD);
1143
1144 do {
1145 union ixgbe_adv_tx_desc *eop = buf->eop;
1146 if (eop == NULL) /* No work */
1147 break;
1148
1149 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1150 break; /* I/O not complete */
1151
1152 if (buf->m_head) {
1153 txr->bytes += buf->m_head->m_pkthdr.len;
1154 bus_dmamap_sync(txr->txtag->dt_dmat, buf->map,
1155 0, buf->m_head->m_pkthdr.len,
1156 BUS_DMASYNC_POSTWRITE);
1157 ixgbe_dmamap_unload(txr->txtag, buf->map);
1158 m_freem(buf->m_head);
1159 buf->m_head = NULL;
1160 }
1161 buf->eop = NULL;
1162 txr->txr_no_space = false;
1163 ++txr->tx_avail;
1164
1165 /* We clean the range if multi segment */
1166 while (txd != eop) {
1167 ++txd;
1168 ++buf;
1169 ++work;
1170 /* wrap the ring? */
1171 if (__predict_false(!work)) {
1172 work -= txr->num_desc;
1173 buf = txr->tx_buffers;
1174 txd = txr->tx_base;
1175 }
1176 if (buf->m_head) {
1177 txr->bytes +=
1178 buf->m_head->m_pkthdr.len;
1179 bus_dmamap_sync(txr->txtag->dt_dmat,
1180 buf->map,
1181 0, buf->m_head->m_pkthdr.len,
1182 BUS_DMASYNC_POSTWRITE);
1183 ixgbe_dmamap_unload(txr->txtag,
1184 buf->map);
1185 m_freem(buf->m_head);
1186 buf->m_head = NULL;
1187 }
1188 ++txr->tx_avail;
1189 buf->eop = NULL;
1190
1191 }
1192 ++txr->packets;
1193 ++processed;
1194 ++ifp->if_opackets;
1195
1196 /* Try the next packet */
1197 ++txd;
1198 ++buf;
1199 ++work;
1200 /* reset with a wrap */
1201 if (__predict_false(!work)) {
1202 work -= txr->num_desc;
1203 buf = txr->tx_buffers;
1204 txd = txr->tx_base;
1205 }
1206 prefetch(txd);
1207 } while (__predict_true(--limit));
1208
1209 ixgbe_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1210 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1211
1212 work += txr->num_desc;
1213 txr->next_to_clean = work;
1214
1215 /*
1216 * Queue Hang detection, we know there's
1217 * work outstanding or the first return
1218 * would have been taken, so increment busy
1219 * if nothing managed to get cleaned, then
1220 * in local_timer it will be checked and
1221 * marked as HUNG if it exceeds a MAX attempt.
1222 */
1223 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1224 ++txr->busy;
1225 /*
1226 * If anything gets cleaned we reset state to 1,
1227 * note this will turn off HUNG if its set.
1228 */
1229 if (processed)
1230 txr->busy = 1;
1231
1232 if (txr->tx_avail == txr->num_desc)
1233 txr->busy = 0;
1234
1235 return ((limit > 0) ? false : true);
1236 } /* ixgbe_txeof */
1237
1238 /************************************************************************
1239 * ixgbe_rsc_count
1240 *
1241 * Used to detect a descriptor that has been merged by Hardware RSC.
1242 ************************************************************************/
1243 static inline u32
1244 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1245 {
1246 return (le32toh(rx->wb.lower.lo_dword.data) &
1247 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1248 } /* ixgbe_rsc_count */
1249
1250 /************************************************************************
1251 * ixgbe_setup_hw_rsc
1252 *
1253 * Initialize Hardware RSC (LRO) feature on 82599
1254 * for an RX ring, this is toggled by the LRO capability
1255 * even though it is transparent to the stack.
1256 *
1257 * NOTE: Since this HW feature only works with IPv4 and
1258 * testing has shown soft LRO to be as effective,
1259 * this feature will be disabled by default.
1260 ************************************************************************/
1261 static void
1262 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1263 {
1264 struct adapter *adapter = rxr->adapter;
1265 struct ixgbe_hw *hw = &adapter->hw;
1266 u32 rscctrl, rdrxctl;
1267
1268 /* If turning LRO/RSC off we need to disable it */
1269 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1270 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1271 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1272 return;
1273 }
1274
1275 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1276 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1277 #ifdef DEV_NETMAP
1278 /* Always strip CRC unless Netmap disabled it */
1279 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1280 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1281 ix_crcstrip)
1282 #endif /* DEV_NETMAP */
1283 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1284 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1285 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1286
1287 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1288 rscctrl |= IXGBE_RSCCTL_RSCEN;
1289 /*
1290 * Limit the total number of descriptors that
1291 * can be combined, so it does not exceed 64K
1292 */
1293 if (rxr->mbuf_sz == MCLBYTES)
1294 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1295 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1296 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1297 else if (rxr->mbuf_sz == MJUM9BYTES)
1298 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1299 else /* Using 16K cluster */
1300 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1301
1302 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1303
1304 /* Enable TCP header recognition */
1305 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1306 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1307
1308 /* Disable RSC for ACK packets */
1309 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1310 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1311
1312 rxr->hw_rsc = TRUE;
1313 } /* ixgbe_setup_hw_rsc */
1314
1315 /************************************************************************
1316 * ixgbe_refresh_mbufs
1317 *
1318 * Refresh mbuf buffers for RX descriptor rings
1319 * - now keeps its own state so discards due to resource
1320 * exhaustion are unnecessary, if an mbuf cannot be obtained
1321 * it just returns, keeping its placeholder, thus it can simply
1322 * be recalled to try again.
1323 ************************************************************************/
1324 static void
1325 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1326 {
1327 struct adapter *adapter = rxr->adapter;
1328 struct ixgbe_rx_buf *rxbuf;
1329 struct mbuf *mp;
1330 int i, j, error;
1331 bool refreshed = false;
1332
1333 i = j = rxr->next_to_refresh;
1334 /* Control the loop with one beyond */
1335 if (++j == rxr->num_desc)
1336 j = 0;
1337
1338 while (j != limit) {
1339 rxbuf = &rxr->rx_buffers[i];
1340 if (rxbuf->buf == NULL) {
1341 mp = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1342 MT_DATA, M_PKTHDR, rxr->mbuf_sz);
1343 if (mp == NULL) {
1344 rxr->no_jmbuf.ev_count++;
1345 goto update;
1346 }
1347 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1348 m_adj(mp, ETHER_ALIGN);
1349 } else
1350 mp = rxbuf->buf;
1351
1352 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1353
1354 /* If we're dealing with an mbuf that was copied rather
1355 * than replaced, there's no need to go through busdma.
1356 */
1357 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1358 /* Get the memory mapping */
1359 ixgbe_dmamap_unload(rxr->ptag, rxbuf->pmap);
1360 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat,
1361 rxbuf->pmap, mp, BUS_DMA_NOWAIT);
1362 if (error != 0) {
1363 device_printf(adapter->dev, "Refresh mbufs: "
1364 "payload dmamap load failure - %d\n",
1365 error);
1366 m_free(mp);
1367 rxbuf->buf = NULL;
1368 goto update;
1369 }
1370 rxbuf->buf = mp;
1371 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1372 0, mp->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1373 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1374 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1375 } else {
1376 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1377 rxbuf->flags &= ~IXGBE_RX_COPY;
1378 }
1379
1380 refreshed = true;
1381 /* Next is precalculated */
1382 i = j;
1383 rxr->next_to_refresh = i;
1384 if (++j == rxr->num_desc)
1385 j = 0;
1386 }
1387
1388 update:
1389 if (refreshed) /* Update hardware tail index */
1390 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1391
1392 return;
1393 } /* ixgbe_refresh_mbufs */
1394
1395 /************************************************************************
1396 * ixgbe_allocate_receive_buffers
1397 *
1398 * Allocate memory for rx_buffer structures. Since we use one
1399 * rx_buffer per received packet, the maximum number of rx_buffer's
1400 * that we'll need is equal to the number of receive descriptors
1401 * that we've allocated.
1402 ************************************************************************/
1403 static int
1404 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1405 {
1406 struct adapter *adapter = rxr->adapter;
1407 device_t dev = adapter->dev;
1408 struct ixgbe_rx_buf *rxbuf;
1409 int bsize, error;
1410
1411 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1412 rxr->rx_buffers = malloc(bsize, M_DEVBUF, M_WAITOK | M_ZERO);
1413
1414 error = ixgbe_dma_tag_create(
1415 /* parent */ adapter->osdep.dmat,
1416 /* alignment */ 1,
1417 /* bounds */ 0,
1418 /* maxsize */ MJUM16BYTES,
1419 /* nsegments */ 1,
1420 /* maxsegsize */ MJUM16BYTES,
1421 /* flags */ 0,
1422 &rxr->ptag);
1423 if (error != 0) {
1424 aprint_error_dev(dev, "Unable to create RX DMA tag\n");
1425 goto fail;
1426 }
1427
1428 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1429 rxbuf = &rxr->rx_buffers[i];
1430 error = ixgbe_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1431 if (error) {
1432 aprint_error_dev(dev, "Unable to create RX dma map\n");
1433 goto fail;
1434 }
1435 }
1436
1437 return (0);
1438
1439 fail:
1440 /* Frees all, but can handle partial completion */
1441 ixgbe_free_receive_structures(adapter);
1442
1443 return (error);
1444 } /* ixgbe_allocate_receive_buffers */
1445
1446 /************************************************************************
1447 * ixgbe_free_receive_ring
1448 ************************************************************************/
1449 static void
1450 ixgbe_free_receive_ring(struct rx_ring *rxr)
1451 {
1452 for (int i = 0; i < rxr->num_desc; i++) {
1453 ixgbe_rx_discard(rxr, i);
1454 }
1455 } /* ixgbe_free_receive_ring */
1456
1457 /************************************************************************
1458 * ixgbe_setup_receive_ring
1459 *
1460 * Initialize a receive ring and its buffers.
1461 ************************************************************************/
1462 static int
1463 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1464 {
1465 struct adapter *adapter;
1466 struct ixgbe_rx_buf *rxbuf;
1467 #ifdef LRO
1468 struct ifnet *ifp;
1469 struct lro_ctrl *lro = &rxr->lro;
1470 #endif /* LRO */
1471 #ifdef DEV_NETMAP
1472 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1473 struct netmap_slot *slot;
1474 #endif /* DEV_NETMAP */
1475 int rsize, error = 0;
1476
1477 adapter = rxr->adapter;
1478 #ifdef LRO
1479 ifp = adapter->ifp;
1480 #endif /* LRO */
1481
1482 /* Clear the ring contents */
1483 IXGBE_RX_LOCK(rxr);
1484
1485 #ifdef DEV_NETMAP
1486 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1487 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1488 #endif /* DEV_NETMAP */
1489
1490 rsize = roundup2(adapter->num_rx_desc *
1491 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1492 bzero((void *)rxr->rx_base, rsize);
1493 /* Cache the size */
1494 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1495
1496 /* Free current RX buffer structs and their mbufs */
1497 ixgbe_free_receive_ring(rxr);
1498
1499 IXGBE_RX_UNLOCK(rxr);
1500 /*
1501 * Now reinitialize our supply of jumbo mbufs. The number
1502 * or size of jumbo mbufs may have changed.
1503 * Assume all of rxr->ptag are the same.
1504 */
1505 ixgbe_jcl_reinit(adapter, rxr->ptag->dt_dmat, rxr,
1506 (2 * adapter->num_rx_desc), adapter->rx_mbuf_sz);
1507
1508 IXGBE_RX_LOCK(rxr);
1509
1510 /* Now replenish the mbufs */
1511 for (int j = 0; j != rxr->num_desc; ++j) {
1512 struct mbuf *mp;
1513
1514 rxbuf = &rxr->rx_buffers[j];
1515
1516 #ifdef DEV_NETMAP
1517 /*
1518 * In netmap mode, fill the map and set the buffer
1519 * address in the NIC ring, considering the offset
1520 * between the netmap and NIC rings (see comment in
1521 * ixgbe_setup_transmit_ring() ). No need to allocate
1522 * an mbuf, so end the block with a continue;
1523 */
1524 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1525 int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
1526 uint64_t paddr;
1527 void *addr;
1528
1529 addr = PNMB(na, slot + sj, &paddr);
1530 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1531 /* Update descriptor and the cached value */
1532 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1533 rxbuf->addr = htole64(paddr);
1534 continue;
1535 }
1536 #endif /* DEV_NETMAP */
1537
1538 rxbuf->flags = 0;
1539 rxbuf->buf = ixgbe_getjcl(&rxr->jcl_head, M_NOWAIT,
1540 MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz);
1541 if (rxbuf->buf == NULL) {
1542 error = ENOBUFS;
1543 goto fail;
1544 }
1545 mp = rxbuf->buf;
1546 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1547 /* Get the memory mapping */
1548 error = bus_dmamap_load_mbuf(rxr->ptag->dt_dmat, rxbuf->pmap,
1549 mp, BUS_DMA_NOWAIT);
1550 if (error != 0)
1551 goto fail;
1552 bus_dmamap_sync(rxr->ptag->dt_dmat, rxbuf->pmap,
1553 0, adapter->rx_mbuf_sz, BUS_DMASYNC_PREREAD);
1554 /* Update the descriptor and the cached value */
1555 rxr->rx_base[j].read.pkt_addr =
1556 htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1557 rxbuf->addr = htole64(rxbuf->pmap->dm_segs[0].ds_addr);
1558 }
1559
1560
1561 /* Setup our descriptor indices */
1562 rxr->next_to_check = 0;
1563 rxr->next_to_refresh = 0;
1564 rxr->lro_enabled = FALSE;
1565 rxr->rx_copies.ev_count = 0;
1566 #if 0 /* NetBSD */
1567 rxr->rx_bytes.ev_count = 0;
1568 #if 1 /* Fix inconsistency */
1569 rxr->rx_packets.ev_count = 0;
1570 #endif
1571 #endif
1572 rxr->vtag_strip = FALSE;
1573
1574 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1575 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1576
1577 /*
1578 * Now set up the LRO interface
1579 */
1580 if (ixgbe_rsc_enable)
1581 ixgbe_setup_hw_rsc(rxr);
1582 #ifdef LRO
1583 else if (ifp->if_capenable & IFCAP_LRO) {
1584 device_t dev = adapter->dev;
1585 int err = tcp_lro_init(lro);
1586 if (err) {
1587 device_printf(dev, "LRO Initialization failed!\n");
1588 goto fail;
1589 }
1590 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1591 rxr->lro_enabled = TRUE;
1592 lro->ifp = adapter->ifp;
1593 }
1594 #endif /* LRO */
1595
1596 IXGBE_RX_UNLOCK(rxr);
1597
1598 return (0);
1599
1600 fail:
1601 ixgbe_free_receive_ring(rxr);
1602 IXGBE_RX_UNLOCK(rxr);
1603
1604 return (error);
1605 } /* ixgbe_setup_receive_ring */
1606
1607 /************************************************************************
1608 * ixgbe_setup_receive_structures - Initialize all receive rings.
1609 ************************************************************************/
1610 int
1611 ixgbe_setup_receive_structures(struct adapter *adapter)
1612 {
1613 struct rx_ring *rxr = adapter->rx_rings;
1614 int j;
1615
1616 for (j = 0; j < adapter->num_queues; j++, rxr++)
1617 if (ixgbe_setup_receive_ring(rxr))
1618 goto fail;
1619
1620 return (0);
1621 fail:
1622 /*
1623 * Free RX buffers allocated so far, we will only handle
1624 * the rings that completed, the failing case will have
1625 * cleaned up for itself. 'j' failed, so its the terminus.
1626 */
1627 for (int i = 0; i < j; ++i) {
1628 rxr = &adapter->rx_rings[i];
1629 IXGBE_RX_LOCK(rxr);
1630 ixgbe_free_receive_ring(rxr);
1631 IXGBE_RX_UNLOCK(rxr);
1632 }
1633
1634 return (ENOBUFS);
1635 } /* ixgbe_setup_receive_structures */
1636
1637
1638 /************************************************************************
1639 * ixgbe_free_receive_structures - Free all receive rings.
1640 ************************************************************************/
1641 void
1642 ixgbe_free_receive_structures(struct adapter *adapter)
1643 {
1644 struct rx_ring *rxr = adapter->rx_rings;
1645
1646 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1647
1648 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1649 ixgbe_free_receive_buffers(rxr);
1650 #ifdef LRO
1651 /* Free LRO memory */
1652 tcp_lro_free(&rxr->lro);
1653 #endif /* LRO */
1654 /* Free the ring memory as well */
1655 ixgbe_dma_free(adapter, &rxr->rxdma);
1656 IXGBE_RX_LOCK_DESTROY(rxr);
1657 }
1658
1659 free(adapter->rx_rings, M_DEVBUF);
1660 } /* ixgbe_free_receive_structures */
1661
1662
1663 /************************************************************************
1664 * ixgbe_free_receive_buffers - Free receive ring data structures
1665 ************************************************************************/
1666 static void
1667 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1668 {
1669 struct adapter *adapter = rxr->adapter;
1670 struct ixgbe_rx_buf *rxbuf;
1671
1672 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1673
1674 /* Cleanup any existing buffers */
1675 if (rxr->rx_buffers != NULL) {
1676 for (int i = 0; i < adapter->num_rx_desc; i++) {
1677 rxbuf = &rxr->rx_buffers[i];
1678 ixgbe_rx_discard(rxr, i);
1679 if (rxbuf->pmap != NULL) {
1680 ixgbe_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1681 rxbuf->pmap = NULL;
1682 }
1683 }
1684
1685 /* NetBSD specific. See ixgbe_netbsd.c */
1686 ixgbe_jcl_destroy(adapter, rxr);
1687
1688 if (rxr->rx_buffers != NULL) {
1689 free(rxr->rx_buffers, M_DEVBUF);
1690 rxr->rx_buffers = NULL;
1691 }
1692 }
1693
1694 if (rxr->ptag != NULL) {
1695 ixgbe_dma_tag_destroy(rxr->ptag);
1696 rxr->ptag = NULL;
1697 }
1698
1699 return;
1700 } /* ixgbe_free_receive_buffers */
1701
1702 /************************************************************************
1703 * ixgbe_rx_input
1704 ************************************************************************/
1705 static __inline void
1706 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1707 u32 ptype)
1708 {
1709 struct adapter *adapter = ifp->if_softc;
1710
1711 #ifdef LRO
1712 struct ethercom *ec = &adapter->osdep.ec;
1713
1714 /*
1715 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1716 * should be computed by hardware. Also it should not have VLAN tag in
1717 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1718 */
1719 if (rxr->lro_enabled &&
1720 (ec->ec_capenable & ETHERCAP_VLAN_HWTAGGING) != 0 &&
1721 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1722 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1723 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1724 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1725 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1726 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1727 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1728 /*
1729 * Send to the stack if:
1730 ** - LRO not enabled, or
1731 ** - no LRO resources, or
1732 ** - lro enqueue fails
1733 */
1734 if (rxr->lro.lro_cnt != 0)
1735 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1736 return;
1737 }
1738 #endif /* LRO */
1739
1740 if_percpuq_enqueue(adapter->ipq, m);
1741 } /* ixgbe_rx_input */
1742
1743 /************************************************************************
1744 * ixgbe_rx_discard
1745 ************************************************************************/
1746 static __inline void
1747 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1748 {
1749 struct ixgbe_rx_buf *rbuf;
1750
1751 rbuf = &rxr->rx_buffers[i];
1752
1753 /*
1754 * With advanced descriptors the writeback
1755 * clobbers the buffer addrs, so its easier
1756 * to just free the existing mbufs and take
1757 * the normal refresh path to get new buffers
1758 * and mapping.
1759 */
1760
1761 if (rbuf->fmp != NULL) {/* Partial chain ? */
1762 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1763 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1764 m_freem(rbuf->fmp);
1765 rbuf->fmp = NULL;
1766 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1767 } else if (rbuf->buf) {
1768 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1769 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1770 m_free(rbuf->buf);
1771 rbuf->buf = NULL;
1772 }
1773 ixgbe_dmamap_unload(rxr->ptag, rbuf->pmap);
1774
1775 rbuf->flags = 0;
1776
1777 return;
1778 } /* ixgbe_rx_discard */
1779
1780
1781 /************************************************************************
1782 * ixgbe_rxeof
1783 *
1784 * Executes in interrupt context. It replenishes the
1785 * mbufs in the descriptor and sends data which has
1786 * been dma'ed into host memory to upper layer.
1787 *
1788 * Return TRUE for more work, FALSE for all clean.
1789 ************************************************************************/
1790 bool
1791 ixgbe_rxeof(struct ix_queue *que)
1792 {
1793 struct adapter *adapter = que->adapter;
1794 struct rx_ring *rxr = que->rxr;
1795 struct ifnet *ifp = adapter->ifp;
1796 #ifdef LRO
1797 struct lro_ctrl *lro = &rxr->lro;
1798 #endif /* LRO */
1799 union ixgbe_adv_rx_desc *cur;
1800 struct ixgbe_rx_buf *rbuf, *nbuf;
1801 int i, nextp, processed = 0;
1802 u32 staterr = 0;
1803 u32 count = adapter->rx_process_limit;
1804 #ifdef RSS
1805 u16 pkt_info;
1806 #endif
1807
1808 IXGBE_RX_LOCK(rxr);
1809
1810 #ifdef DEV_NETMAP
1811 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1812 /* Same as the txeof routine: wakeup clients on intr. */
1813 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1814 IXGBE_RX_UNLOCK(rxr);
1815 return (FALSE);
1816 }
1817 }
1818 #endif /* DEV_NETMAP */
1819
1820 for (i = rxr->next_to_check; count != 0;) {
1821 struct mbuf *sendmp, *mp;
1822 u32 rsc, ptype;
1823 u16 len;
1824 u16 vtag = 0;
1825 bool eop;
1826
1827 /* Sync the ring. */
1828 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1829 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1830
1831 cur = &rxr->rx_base[i];
1832 staterr = le32toh(cur->wb.upper.status_error);
1833 #ifdef RSS
1834 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1835 #endif
1836
1837 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1838 break;
1839
1840 count--;
1841 sendmp = NULL;
1842 nbuf = NULL;
1843 rsc = 0;
1844 cur->wb.upper.status_error = 0;
1845 rbuf = &rxr->rx_buffers[i];
1846 mp = rbuf->buf;
1847
1848 len = le16toh(cur->wb.upper.length);
1849 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1850 IXGBE_RXDADV_PKTTYPE_MASK;
1851 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1852
1853 /* Make sure bad packets are discarded */
1854 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1855 #if __FreeBSD_version >= 1100036
1856 if (adapter->feat_en & IXGBE_FEATURE_VF)
1857 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1858 #endif
1859 rxr->rx_discarded.ev_count++;
1860 ixgbe_rx_discard(rxr, i);
1861 goto next_desc;
1862 }
1863
1864 bus_dmamap_sync(rxr->ptag->dt_dmat, rbuf->pmap, 0,
1865 rbuf->buf->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
1866
1867 /*
1868 * On 82599 which supports a hardware
1869 * LRO (called HW RSC), packets need
1870 * not be fragmented across sequential
1871 * descriptors, rather the next descriptor
1872 * is indicated in bits of the descriptor.
1873 * This also means that we might proceses
1874 * more than one packet at a time, something
1875 * that has never been true before, it
1876 * required eliminating global chain pointers
1877 * in favor of what we are doing here. -jfv
1878 */
1879 if (!eop) {
1880 /*
1881 * Figure out the next descriptor
1882 * of this frame.
1883 */
1884 if (rxr->hw_rsc == TRUE) {
1885 rsc = ixgbe_rsc_count(cur);
1886 rxr->rsc_num += (rsc - 1);
1887 }
1888 if (rsc) { /* Get hardware index */
1889 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1890 IXGBE_RXDADV_NEXTP_SHIFT);
1891 } else { /* Just sequential */
1892 nextp = i + 1;
1893 if (nextp == adapter->num_rx_desc)
1894 nextp = 0;
1895 }
1896 nbuf = &rxr->rx_buffers[nextp];
1897 prefetch(nbuf);
1898 }
1899 /*
1900 * Rather than using the fmp/lmp global pointers
1901 * we now keep the head of a packet chain in the
1902 * buffer struct and pass this along from one
1903 * descriptor to the next, until we get EOP.
1904 */
1905 mp->m_len = len;
1906 /*
1907 * See if there is a stored head
1908 * that determines what we are
1909 */
1910 sendmp = rbuf->fmp;
1911 if (sendmp != NULL) { /* secondary frag */
1912 rbuf->buf = rbuf->fmp = NULL;
1913 mp->m_flags &= ~M_PKTHDR;
1914 sendmp->m_pkthdr.len += mp->m_len;
1915 } else {
1916 /*
1917 * Optimize. This might be a small packet,
1918 * maybe just a TCP ACK. Do a fast copy that
1919 * is cache aligned into a new mbuf, and
1920 * leave the old mbuf+cluster for re-use.
1921 */
1922 if (eop && len <= IXGBE_RX_COPY_LEN) {
1923 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1924 if (sendmp != NULL) {
1925 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1926 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1927 len);
1928 sendmp->m_len = len;
1929 rxr->rx_copies.ev_count++;
1930 rbuf->flags |= IXGBE_RX_COPY;
1931 }
1932 }
1933 if (sendmp == NULL) {
1934 rbuf->buf = rbuf->fmp = NULL;
1935 sendmp = mp;
1936 }
1937
1938 /* first desc of a non-ps chain */
1939 sendmp->m_flags |= M_PKTHDR;
1940 sendmp->m_pkthdr.len = mp->m_len;
1941 }
1942 ++processed;
1943
1944 /* Pass the head pointer on */
1945 if (eop == 0) {
1946 nbuf->fmp = sendmp;
1947 sendmp = NULL;
1948 mp->m_next = nbuf->buf;
1949 } else { /* Sending this frame */
1950 m_set_rcvif(sendmp, ifp);
1951 ++rxr->packets;
1952 rxr->rx_packets.ev_count++;
1953 /* capture data for AIM */
1954 rxr->bytes += sendmp->m_pkthdr.len;
1955 rxr->rx_bytes.ev_count += sendmp->m_pkthdr.len;
1956 /* Process vlan info */
1957 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1958 vtag = le16toh(cur->wb.upper.vlan);
1959 if (vtag) {
1960 vlan_set_tag(sendmp, vtag);
1961 }
1962 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {
1963 ixgbe_rx_checksum(staterr, sendmp, ptype,
1964 &adapter->stats.pf);
1965 }
1966
1967 #if 0 /* FreeBSD */
1968 /*
1969 * In case of multiqueue, we have RXCSUM.PCSD bit set
1970 * and never cleared. This means we have RSS hash
1971 * available to be used.
1972 */
1973 if (adapter->num_queues > 1) {
1974 sendmp->m_pkthdr.flowid =
1975 le32toh(cur->wb.lower.hi_dword.rss);
1976 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1977 case IXGBE_RXDADV_RSSTYPE_IPV4:
1978 M_HASHTYPE_SET(sendmp,
1979 M_HASHTYPE_RSS_IPV4);
1980 break;
1981 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1982 M_HASHTYPE_SET(sendmp,
1983 M_HASHTYPE_RSS_TCP_IPV4);
1984 break;
1985 case IXGBE_RXDADV_RSSTYPE_IPV6:
1986 M_HASHTYPE_SET(sendmp,
1987 M_HASHTYPE_RSS_IPV6);
1988 break;
1989 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1990 M_HASHTYPE_SET(sendmp,
1991 M_HASHTYPE_RSS_TCP_IPV6);
1992 break;
1993 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1994 M_HASHTYPE_SET(sendmp,
1995 M_HASHTYPE_RSS_IPV6_EX);
1996 break;
1997 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1998 M_HASHTYPE_SET(sendmp,
1999 M_HASHTYPE_RSS_TCP_IPV6_EX);
2000 break;
2001 #if __FreeBSD_version > 1100000
2002 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
2003 M_HASHTYPE_SET(sendmp,
2004 M_HASHTYPE_RSS_UDP_IPV4);
2005 break;
2006 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
2007 M_HASHTYPE_SET(sendmp,
2008 M_HASHTYPE_RSS_UDP_IPV6);
2009 break;
2010 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
2011 M_HASHTYPE_SET(sendmp,
2012 M_HASHTYPE_RSS_UDP_IPV6_EX);
2013 break;
2014 #endif
2015 default:
2016 M_HASHTYPE_SET(sendmp,
2017 M_HASHTYPE_OPAQUE_HASH);
2018 }
2019 } else {
2020 sendmp->m_pkthdr.flowid = que->msix;
2021 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
2022 }
2023 #endif
2024 }
2025 next_desc:
2026 ixgbe_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
2027 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2028
2029 /* Advance our pointers to the next descriptor. */
2030 if (++i == rxr->num_desc)
2031 i = 0;
2032
2033 /* Now send to the stack or do LRO */
2034 if (sendmp != NULL) {
2035 rxr->next_to_check = i;
2036 IXGBE_RX_UNLOCK(rxr);
2037 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
2038 IXGBE_RX_LOCK(rxr);
2039 i = rxr->next_to_check;
2040 }
2041
2042 /* Every 8 descriptors we go to refresh mbufs */
2043 if (processed == 8) {
2044 ixgbe_refresh_mbufs(rxr, i);
2045 processed = 0;
2046 }
2047 }
2048
2049 /* Refresh any remaining buf structs */
2050 if (ixgbe_rx_unrefreshed(rxr))
2051 ixgbe_refresh_mbufs(rxr, i);
2052
2053 rxr->next_to_check = i;
2054
2055 IXGBE_RX_UNLOCK(rxr);
2056
2057 #ifdef LRO
2058 /*
2059 * Flush any outstanding LRO work
2060 */
2061 tcp_lro_flush_all(lro);
2062 #endif /* LRO */
2063
2064 /*
2065 * Still have cleaning to do?
2066 */
2067 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2068 return (TRUE);
2069
2070 return (FALSE);
2071 } /* ixgbe_rxeof */
2072
2073
2074 /************************************************************************
2075 * ixgbe_rx_checksum
2076 *
2077 * Verify that the hardware indicated that the checksum is valid.
2078 * Inform the stack about the status of checksum so that stack
2079 * doesn't spend time verifying the checksum.
2080 ************************************************************************/
2081 static void
2082 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype,
2083 struct ixgbe_hw_stats *stats)
2084 {
2085 u16 status = (u16)staterr;
2086 u8 errors = (u8)(staterr >> 24);
2087 #if 0
2088 bool sctp = false;
2089
2090 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2091 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2092 sctp = true;
2093 #endif
2094
2095 /* IPv4 checksum */
2096 if (status & IXGBE_RXD_STAT_IPCS) {
2097 stats->ipcs.ev_count++;
2098 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2099 /* IP Checksum Good */
2100 mp->m_pkthdr.csum_flags = M_CSUM_IPv4;
2101 } else {
2102 stats->ipcs_bad.ev_count++;
2103 mp->m_pkthdr.csum_flags = M_CSUM_IPv4|M_CSUM_IPv4_BAD;
2104 }
2105 }
2106 /* TCP/UDP/SCTP checksum */
2107 if (status & IXGBE_RXD_STAT_L4CS) {
2108 stats->l4cs.ev_count++;
2109 int type = M_CSUM_TCPv4|M_CSUM_TCPv6|M_CSUM_UDPv4|M_CSUM_UDPv6;
2110 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2111 mp->m_pkthdr.csum_flags |= type;
2112 } else {
2113 stats->l4cs_bad.ev_count++;
2114 mp->m_pkthdr.csum_flags |= type | M_CSUM_TCP_UDP_BAD;
2115 }
2116 }
2117 } /* ixgbe_rx_checksum */
2118
2119 /************************************************************************
2120 * ixgbe_dma_malloc
2121 ************************************************************************/
2122 int
2123 ixgbe_dma_malloc(struct adapter *adapter, const bus_size_t size,
2124 struct ixgbe_dma_alloc *dma, const int mapflags)
2125 {
2126 device_t dev = adapter->dev;
2127 int r, rsegs;
2128
2129 r = ixgbe_dma_tag_create(
2130 /* parent */ adapter->osdep.dmat,
2131 /* alignment */ DBA_ALIGN,
2132 /* bounds */ 0,
2133 /* maxsize */ size,
2134 /* nsegments */ 1,
2135 /* maxsegsize */ size,
2136 /* flags */ BUS_DMA_ALLOCNOW,
2137 &dma->dma_tag);
2138 if (r != 0) {
2139 aprint_error_dev(dev,
2140 "%s: ixgbe_dma_tag_create failed; error %d\n", __func__,
2141 r);
2142 goto fail_0;
2143 }
2144
2145 r = bus_dmamem_alloc(dma->dma_tag->dt_dmat, size,
2146 dma->dma_tag->dt_alignment, dma->dma_tag->dt_boundary,
2147 &dma->dma_seg, 1, &rsegs, BUS_DMA_NOWAIT);
2148 if (r != 0) {
2149 aprint_error_dev(dev,
2150 "%s: bus_dmamem_alloc failed; error %d\n", __func__, r);
2151 goto fail_1;
2152 }
2153
2154 r = bus_dmamem_map(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs,
2155 size, &dma->dma_vaddr, BUS_DMA_NOWAIT);
2156 if (r != 0) {
2157 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2158 __func__, r);
2159 goto fail_2;
2160 }
2161
2162 r = ixgbe_dmamap_create(dma->dma_tag, 0, &dma->dma_map);
2163 if (r != 0) {
2164 aprint_error_dev(dev, "%s: bus_dmamem_map failed; error %d\n",
2165 __func__, r);
2166 goto fail_3;
2167 }
2168
2169 r = bus_dmamap_load(dma->dma_tag->dt_dmat, dma->dma_map,
2170 dma->dma_vaddr, size, NULL, mapflags | BUS_DMA_NOWAIT);
2171 if (r != 0) {
2172 aprint_error_dev(dev, "%s: bus_dmamap_load failed; error %d\n",
2173 __func__, r);
2174 goto fail_4;
2175 }
2176 dma->dma_paddr = dma->dma_map->dm_segs[0].ds_addr;
2177 dma->dma_size = size;
2178 return 0;
2179 fail_4:
2180 ixgbe_dmamap_destroy(dma->dma_tag, dma->dma_map);
2181 fail_3:
2182 bus_dmamem_unmap(dma->dma_tag->dt_dmat, dma->dma_vaddr, size);
2183 fail_2:
2184 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, rsegs);
2185 fail_1:
2186 ixgbe_dma_tag_destroy(dma->dma_tag);
2187 fail_0:
2188
2189 return (r);
2190 } /* ixgbe_dma_malloc */
2191
2192 /************************************************************************
2193 * ixgbe_dma_free
2194 ************************************************************************/
2195 void
2196 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2197 {
2198 bus_dmamap_sync(dma->dma_tag->dt_dmat, dma->dma_map, 0, dma->dma_size,
2199 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2200 ixgbe_dmamap_unload(dma->dma_tag, dma->dma_map);
2201 bus_dmamem_free(dma->dma_tag->dt_dmat, &dma->dma_seg, 1);
2202 ixgbe_dma_tag_destroy(dma->dma_tag);
2203 } /* ixgbe_dma_free */
2204
2205
2206 /************************************************************************
2207 * ixgbe_allocate_queues
2208 *
2209 * Allocate memory for the transmit and receive rings, and then
2210 * the descriptors associated with each, called only once at attach.
2211 ************************************************************************/
2212 int
2213 ixgbe_allocate_queues(struct adapter *adapter)
2214 {
2215 device_t dev = adapter->dev;
2216 struct ix_queue *que;
2217 struct tx_ring *txr;
2218 struct rx_ring *rxr;
2219 int rsize, tsize, error = IXGBE_SUCCESS;
2220 int txconf = 0, rxconf = 0;
2221
2222 /* First, allocate the top level queue structs */
2223 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2224 adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2225
2226 /* Second, allocate the TX ring struct memory */
2227 adapter->tx_rings = malloc(sizeof(struct tx_ring) *
2228 adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2229
2230 /* Third, allocate the RX ring */
2231 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2232 adapter->num_queues, M_DEVBUF, M_WAITOK | M_ZERO);
2233
2234 /* For the ring itself */
2235 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2236 DBA_ALIGN);
2237
2238 /*
2239 * Now set up the TX queues, txconf is needed to handle the
2240 * possibility that things fail midcourse and we need to
2241 * undo memory gracefully
2242 */
2243 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2244 /* Set up some basics */
2245 txr = &adapter->tx_rings[i];
2246 txr->adapter = adapter;
2247 txr->txr_interq = NULL;
2248 /* In case SR-IOV is enabled, align the index properly */
2249 #ifdef PCI_IOV
2250 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2251 i);
2252 #else
2253 txr->me = i;
2254 #endif
2255 txr->num_desc = adapter->num_tx_desc;
2256
2257 /* Initialize the TX side lock */
2258 mutex_init(&txr->tx_mtx, MUTEX_DEFAULT, IPL_NET);
2259
2260 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2261 BUS_DMA_NOWAIT)) {
2262 aprint_error_dev(dev,
2263 "Unable to allocate TX Descriptor memory\n");
2264 error = ENOMEM;
2265 goto err_tx_desc;
2266 }
2267 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2268 bzero((void *)txr->tx_base, tsize);
2269
2270 /* Now allocate transmit buffers for the ring */
2271 if (ixgbe_allocate_transmit_buffers(txr)) {
2272 aprint_error_dev(dev,
2273 "Critical Failure setting up transmit buffers\n");
2274 error = ENOMEM;
2275 goto err_tx_desc;
2276 }
2277 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2278 /* Allocate a buf ring */
2279 txr->txr_interq = pcq_create(IXGBE_BR_SIZE, KM_SLEEP);
2280 if (txr->txr_interq == NULL) {
2281 aprint_error_dev(dev,
2282 "Critical Failure setting up buf ring\n");
2283 error = ENOMEM;
2284 goto err_tx_desc;
2285 }
2286 }
2287 }
2288
2289 /*
2290 * Next the RX queues...
2291 */
2292 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2293 DBA_ALIGN);
2294 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2295 rxr = &adapter->rx_rings[i];
2296 /* Set up some basics */
2297 rxr->adapter = adapter;
2298 #ifdef PCI_IOV
2299 /* In case SR-IOV is enabled, align the index properly */
2300 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2301 i);
2302 #else
2303 rxr->me = i;
2304 #endif
2305 rxr->num_desc = adapter->num_rx_desc;
2306
2307 /* Initialize the RX side lock */
2308 mutex_init(&rxr->rx_mtx, MUTEX_DEFAULT, IPL_NET);
2309
2310 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2311 BUS_DMA_NOWAIT)) {
2312 aprint_error_dev(dev,
2313 "Unable to allocate RxDescriptor memory\n");
2314 error = ENOMEM;
2315 goto err_rx_desc;
2316 }
2317 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2318 bzero((void *)rxr->rx_base, rsize);
2319
2320 /* Allocate receive buffers for the ring */
2321 if (ixgbe_allocate_receive_buffers(rxr)) {
2322 aprint_error_dev(dev,
2323 "Critical Failure setting up receive buffers\n");
2324 error = ENOMEM;
2325 goto err_rx_desc;
2326 }
2327 }
2328
2329 /*
2330 * Finally set up the queue holding structs
2331 */
2332 for (int i = 0; i < adapter->num_queues; i++) {
2333 que = &adapter->queues[i];
2334 que->adapter = adapter;
2335 que->me = i;
2336 que->txr = &adapter->tx_rings[i];
2337 que->rxr = &adapter->rx_rings[i];
2338
2339 mutex_init(&que->dc_mtx, MUTEX_DEFAULT, IPL_NET);
2340 que->disabled_count = 0;
2341 }
2342
2343 return (0);
2344
2345 err_rx_desc:
2346 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2347 ixgbe_dma_free(adapter, &rxr->rxdma);
2348 err_tx_desc:
2349 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2350 ixgbe_dma_free(adapter, &txr->txdma);
2351 free(adapter->rx_rings, M_DEVBUF);
2352 free(adapter->tx_rings, M_DEVBUF);
2353 free(adapter->queues, M_DEVBUF);
2354 return (error);
2355 } /* ixgbe_allocate_queues */
2356
2357 /************************************************************************
2358 * ixgbe_free_queues
2359 *
2360 * Free descriptors for the transmit and receive rings, and then
2361 * the memory associated with each.
2362 ************************************************************************/
2363 void
2364 ixgbe_free_queues(struct adapter *adapter)
2365 {
2366 struct ix_queue *que;
2367 int i;
2368
2369 ixgbe_free_transmit_structures(adapter);
2370 ixgbe_free_receive_structures(adapter);
2371 for (i = 0; i < adapter->num_queues; i++) {
2372 que = &adapter->queues[i];
2373 mutex_destroy(&que->dc_mtx);
2374 }
2375 free(adapter->queues, M_DEVBUF);
2376 } /* ixgbe_free_queues */
2377